In [131]:
import pandas as pd
import numpy as np
from scrapper import Scrapper

In [132]:

url = 'https://in.tradingview.com/markets/stocks-india/news/'

scrapper = Scrapper(url)
titles = scrapper.get_all_article_titles()
titles_array = np.array(titles)
# titles_array.shape
titles

["India's Power Grid's profit slips on subdued transmission demand",
 "India's Petronet eyes lower prices under renewed long-term deal with Qatar",
 'Reliance Retail to launch $200-laptop to replicate phone success',
 "India's Adani Group plans $1.82-bln fundraise through bond market - Bloomberg News",
 "India's Heritage Foods posts two-fold surge in Q1 profit on higher milk demand, prices",
 "After Sequoia split, India's Peak XV to hold CEOs meet for first time",
 'Sri Lankan shares end higher as industrials, financials rise',
 "State Bank of India's infra bond response likely to lure other banks into tapping mkt - bankers",
 'Indian Equities Close Higher as US, Eurozone Inflation Cools',
 'ICICI Securities to issue 3-month CP - bankers',
 'Fund managers sceptical of infra stocks opportunity because of past experience: Nilesh Shah',
 'XOM: Exxon Mobil Profits Drop 55% Dragged by Falling Oil Prices, Shares Resist Selloff',
 'Grasim Industries accepts bids for 3-year bonds - bankers',
 

In [133]:
import spacy
from spacy import displacy

In [134]:
nlp = spacy.load('en_core_web_sm')

processed_titles = []
for title in titles:
    doc = nlp(title)
    processed_title = []
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        processed_title.append(token.lemma_.lower())
    processed_titles.append(processed_title)
processed_titles = np.array(processed_titles)
print(type(processed_titles))
processed_titles[:3]

<class 'numpy.ndarray'>


  processed_titles = np.array(processed_titles)


array([list(['india', 'power', 'grid', 'profit', 'slip', 'subdued', 'transmission', 'demand']),
       list(['india', 'petronet', 'eye', 'lower', 'price', 'renew', 'long', 'term', 'deal', 'qatar']),
       list(['reliance', 'retail', 'launch', '$', '200', 'laptop', 'replicate', 'phone', 'success'])],
      dtype=object)

In [135]:
df = pd.DataFrame(processed_titles)
df['Original'] = titles
df.head()

Unnamed: 0,0,Original
0,"[india, power, grid, profit, slip, subdued, tr...",India's Power Grid's profit slips on subdued t...
1,"[india, petronet, eye, lower, price, renew, lo...",India's Petronet eyes lower prices under renew...
2,"[reliance, retail, launch, $, 200, laptop, rep...",Reliance Retail to launch $200-laptop to repli...
3,"[india, adani, group, plan, $, 1.82, bln, fund...",India's Adani Group plans $1.82-bln fundraise ...
4,"[india, heritage, foods, post, fold, surge, q1...",India's Heritage Foods posts two-fold surge in...


In [136]:
df.rename(columns={0: 'Text'}, inplace=True)
df.head() 

Unnamed: 0,Text,Original
0,"[india, power, grid, profit, slip, subdued, tr...",India's Power Grid's profit slips on subdued t...
1,"[india, petronet, eye, lower, price, renew, lo...",India's Petronet eyes lower prices under renew...
2,"[reliance, retail, launch, $, 200, laptop, rep...",Reliance Retail to launch $200-laptop to repli...
3,"[india, adani, group, plan, $, 1.82, bln, fund...",India's Adani Group plans $1.82-bln fundraise ...
4,"[india, heritage, foods, post, fold, surge, q1...",India's Heritage Foods posts two-fold surge in...


In [137]:
def list_to_string(lst):
    return ' '.join(map(str, lst))

In [138]:
df['Text'] = df['Text'].apply(list_to_string)
df

Unnamed: 0,Text,Original
0,india power grid profit slip subdued transmiss...,India's Power Grid's profit slips on subdued t...
1,india petronet eye lower price renew long term...,India's Petronet eyes lower prices under renew...
2,reliance retail launch $ 200 laptop replicate ...,Reliance Retail to launch $200-laptop to repli...
3,india adani group plan $ 1.82 bln fundraise bo...,India's Adani Group plans $1.82-bln fundraise ...
4,india heritage foods post fold surge q1 profit...,India's Heritage Foods posts two-fold surge in...
5,sequoia split india peak xv hold ceo meet time,"After Sequoia split, India's Peak XV to hold C..."
6,sri lankan share end higher industrial financi...,"Sri Lankan shares end higher as industrials, f..."
7,state bank india infra bond response likely lu...,State Bank of India's infra bond response like...
8,indian equities close high eurozone inflation ...,"Indian Equities Close Higher as US, Eurozone I..."
9,icici securities issue 3 month cp banker,ICICI Securities to issue 3-month CP - bankers


In [139]:
import joblib

vectorizer = joblib.load('tfidf_vectorizer.joblib')
df_text_transformed = vectorizer.transform(df['Text'])
df_text_transformed 

<59x9946 sparse matrix of type '<class 'numpy.float64'>'
	with 339 stored elements in Compressed Sparse Row format>

In [141]:
import joblib

loaded_model = joblib.load('model.joblib')


predictions = loaded_model.predict(df_text_transformed)
df['predictions'] = predictions
df['predictions'] = df['predictions'].apply(lambda x: 'Buy' if x == 1 else 'Sell')
df.head(10)

Unnamed: 0,Text,Original,predictions
0,india power grid profit slip subdued transmiss...,India's Power Grid's profit slips on subdued t...,Buy
1,india petronet eye lower price renew long term...,India's Petronet eyes lower prices under renew...,Buy
2,reliance retail launch $ 200 laptop replicate ...,Reliance Retail to launch $200-laptop to repli...,Buy
3,india adani group plan $ 1.82 bln fundraise bo...,India's Adani Group plans $1.82-bln fundraise ...,Buy
4,india heritage foods post fold surge q1 profit...,India's Heritage Foods posts two-fold surge in...,Sell
5,sequoia split india peak xv hold ceo meet time,"After Sequoia split, India's Peak XV to hold C...",Buy
6,sri lankan share end higher industrial financi...,"Sri Lankan shares end higher as industrials, f...",Buy
7,state bank india infra bond response likely lu...,State Bank of India's infra bond response like...,Buy
8,indian equities close high eurozone inflation ...,"Indian Equities Close Higher as US, Eurozone I...",Buy
9,icici securities issue 3 month cp banker,ICICI Securities to issue 3-month CP - bankers,Buy
