In [2]:
import pandas as pd
import numpy as np
from scrapper import Scrapper

In [3]:

url = 'https://in.tradingview.com/markets/stocks-india/news/'

scrapper = Scrapper(url)
titles = scrapper.get_all_article_titles()
titles_array = np.array(titles)
# titles_array.shape
titles

['Accumulate ITC; target of Rs 478: Prabhudas Lilladher',
 'Accumulate Westlife Foodworld; target of Rs 932: Prabhudas Lilladher',
 'Accumulate Nestle India; target of Rs 23,585: Prabhudas Lilladher',
 'Buy Praj Industries; target of Rs 475: Prabhudas Lilladher',
 'Societe Generale buys Bandhan Bank shares worth Rs 382 crore, Goldman Sachs picks 1.2% stake in Spandana Sphoorty',
 'Supreme Petrochem Limited (SPL) raises Polystyrene (PS) prices in the domestic markets of India',
 'Buy HDFC Bank; target of Rs 2070: Motilal Oswal',
 'Buy Tata Consumer Products; target of Rs 985: Motilal Oswal',
 'Buy Indian Bank; target of Rs 380: Motilal Oswal',
 'Buy Vedant Fashions; target of Rs 1400: Motilal Oswal',
 'Buy Mahindra Lifespaces; target of Rs 575: Motilal Oswal',
 'Buy SIS; target of Rs 510: Motilal Oswal',
 'Buy Shriram Finance; target of Rs 2100: Motilal Oswal',
 'Buy Indian Hotels; target of Rs 440: Motilal Oswal',
 'Buy Ajanta Pharma; target of Rs 1800: Motilal Oswal',
 'Buy Laurus Lab

In [4]:
import spacy
from spacy import displacy

In [5]:
nlp = spacy.load('en_core_web_sm')

processed_titles = []
for title in titles:
    doc = nlp(title)
    processed_title = []
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        processed_title.append(token.lemma_.lower())
    processed_titles.append(processed_title)
processed_titles = np.array(processed_titles)
print(type(processed_titles))
processed_titles[:3]

<class 'numpy.ndarray'>


  processed_titles = np.array(processed_titles)


array([list(['accumulate', 'itc', 'target', 'r', '478', 'prabhudas', 'lilladher']),
       list(['accumulate', 'westlife', 'foodworld', 'target', 'r', '932', 'prabhudas', 'lilladher']),
       list(['accumulate', 'nestle', 'india', 'target', 'r', '23,585', 'prabhudas', 'lilladher'])],
      dtype=object)

In [6]:
df = pd.DataFrame(processed_titles)
df['Header'] = titles
df.head()

Unnamed: 0,0,Header
0,"[accumulate, itc, target, r, 478, prabhudas, l...",Accumulate ITC; target of Rs 478: Prabhudas Li...
1,"[accumulate, westlife, foodworld, target, r, 9...",Accumulate Westlife Foodworld; target of Rs 93...
2,"[accumulate, nestle, india, target, r, 23,585,...","Accumulate Nestle India; target of Rs 23,585: ..."
3,"[buy, praj, industries, target, r, 475, prabhu...",Buy Praj Industries; target of Rs 475: Prabhud...
4,"[societe, generale, buy, bandhan, bank, share,...",Societe Generale buys Bandhan Bank shares wort...


In [7]:
df.rename(columns={0: 'Text'}, inplace=True)
df.head() 

Unnamed: 0,Text,Header
0,"[accumulate, itc, target, r, 478, prabhudas, l...",Accumulate ITC; target of Rs 478: Prabhudas Li...
1,"[accumulate, westlife, foodworld, target, r, 9...",Accumulate Westlife Foodworld; target of Rs 93...
2,"[accumulate, nestle, india, target, r, 23,585,...","Accumulate Nestle India; target of Rs 23,585: ..."
3,"[buy, praj, industries, target, r, 475, prabhu...",Buy Praj Industries; target of Rs 475: Prabhud...
4,"[societe, generale, buy, bandhan, bank, share,...",Societe Generale buys Bandhan Bank shares wort...


In [8]:
def list_to_string(lst):
    return ' '.join(map(str, lst))

In [9]:
df['Text'] = df['Text'].apply(list_to_string)
df

Unnamed: 0,Text,Header
0,accumulate itc target r 478 prabhudas lilladher,Accumulate ITC; target of Rs 478: Prabhudas Li...
1,accumulate westlife foodworld target r 932 pra...,Accumulate Westlife Foodworld; target of Rs 93...
2,"accumulate nestle india target r 23,585 prabhu...","Accumulate Nestle India; target of Rs 23,585: ..."
3,buy praj industries target r 475 prabhudas lil...,Buy Praj Industries; target of Rs 475: Prabhud...
4,societe generale buy bandhan bank share worth ...,Societe Generale buys Bandhan Bank shares wort...
5,supreme petrochem limited spl raise polystyren...,Supreme Petrochem Limited (SPL) raises Polysty...
6,buy hdfc bank target r 2070 motilal oswal,Buy HDFC Bank; target of Rs 2070: Motilal Oswal
7,buy tata consumer products target r 985 motila...,Buy Tata Consumer Products; target of Rs 985: ...
8,buy indian bank target r 380 motilal oswal,Buy Indian Bank; target of Rs 380: Motilal Oswal
9,buy vedant fashions target r 1400 motilal oswal,Buy Vedant Fashions; target of Rs 1400: Motila...


In [10]:
import joblib

vectorizer = joblib.load('tfidf_vectorizer.joblib')
df_text_transformed = vectorizer.transform(df['Text'])
df_text_transformed 

<59x9946 sparse matrix of type '<class 'numpy.float64'>'
	with 287 stored elements in Compressed Sparse Row format>

In [11]:
import joblib

loaded_model = joblib.load('model.joblib')


predictions = loaded_model.predict(df_text_transformed)
df['Predictions'] = predictions
df['Predictions'] = df['Predictions'].apply(lambda x: 'Buy' if x == 1 else 'Sell')
df.head(10)

Unnamed: 0,Text,Header,Predictions
0,accumulate itc target r 478 prabhudas lilladher,Accumulate ITC; target of Rs 478: Prabhudas Li...,Buy
1,accumulate westlife foodworld target r 932 pra...,Accumulate Westlife Foodworld; target of Rs 93...,Buy
2,"accumulate nestle india target r 23,585 prabhu...","Accumulate Nestle India; target of Rs 23,585: ...",Buy
3,buy praj industries target r 475 prabhudas lil...,Buy Praj Industries; target of Rs 475: Prabhud...,Buy
4,societe generale buy bandhan bank share worth ...,Societe Generale buys Bandhan Bank shares wort...,Buy
5,supreme petrochem limited spl raise polystyren...,Supreme Petrochem Limited (SPL) raises Polysty...,Buy
6,buy hdfc bank target r 2070 motilal oswal,Buy HDFC Bank; target of Rs 2070: Motilal Oswal,Buy
7,buy tata consumer products target r 985 motila...,Buy Tata Consumer Products; target of Rs 985: ...,Buy
8,buy indian bank target r 380 motilal oswal,Buy Indian Bank; target of Rs 380: Motilal Oswal,Buy
9,buy vedant fashions target r 1400 motilal oswal,Buy Vedant Fashions; target of Rs 1400: Motila...,Buy


In [13]:
df.to_csv('output.csv', columns=['Header', 'Predictions'])