In [None]:
# install yfinance
!pip install yfinance

In [None]:
# import libraries 
import yfinance as yf
import pandas as pd
from kaggle_secrets import UserSecretsClient
import os
import json
import datetime
from datetime import date,timedelta
import warnings
import http.client, urllib.parse
warnings.filterwarnings("ignore")
from transformers import AutoTokenizer, AutoModelForSequenceClassification,pipeline
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

# company symbol and name
company_symbol="RELIANCE.NS"

#initialise today date
today = str(date.today())
yesterday = str(date.today()- timedelta(days = 1))

# flag variables
news_inserted=False

#secret keys of mediastack 
user_secrets = UserSecretsClient()
mediastack_api_token = user_secrets.get_secret("mediastack-token")

# input file paths
stock_history_file_path='../input/reliancestockandnewsdata/reliance_stock_history.csv'
news_file_path='../input/reliancestockandnewsdata/reliance_news.json'
news_sentiment_file_path='../input/reliancestockandnewsdata/reliance_news_sentiment.csv'

# output file paths
output_stock_history_file_path='./reliance_stock_history.csv'
output_news_file_path='./reliance_news.json'
output_news_sentiment_file_path='./reliance_news_sentiment.csv'

# parameters for mediastack api
search_query='reliance'
conn = http.client.HTTPConnection('api.mediastack.com')
params = urllib.parse.urlencode({
    'keywords': search_query,
    'access_key': mediastack_api_token,
    'sort': 'published_desc',
    'limit': 10,
    'languages': 'en',
    'country': 'in',
    'date': yesterday
    })

In [None]:
def create_stock_history_dataset():
    reliance_stock_history=ticker_object.history(period="1d").reset_index()
    return reliance_stock_history

def update_stock_history_dataset():
    reliance_stock_history=pd.read_csv(stock_history_file_path)
    reliance_stock_history.Date=pd.to_datetime(reliance_stock_history.Date, format='%Y/%m/%d')
    today_reliance_stock_data=ticker_object.history(period="1d")
    today_reliance_stock_data=today_reliance_stock_data.reset_index()
    last_stock_date=str(today_reliance_stock_data.loc[0,'Date']).split()[0]
    if last_stock_date == reliance_stock_history['Date'].dt.strftime('%Y-%m-%d')[len(reliance_stock_history)-1]: #if already inserted 
        reliance_stock_history.iloc[-1:,:]=today_reliance_stock_data.iloc[-1].tolist()
    else:
        last_position=len(reliance_stock_history)
        reliance_stock_history.loc[last_position]=today_reliance_stock_data.iloc[-1].tolist()
    return reliance_stock_history

In [None]:
# create stock market history dataset
ticker_object=yf.Ticker(company_symbol)
if os.path.exists(stock_history_file_path)==False:
    reliance_stock_history=create_stock_history_dataset()
else:
    reliance_stock_history=update_stock_history_dataset()


reliance_stock_history.to_csv(output_stock_history_file_path,index=False)

In [None]:
def create_news_dataset():
    conn.request('GET', '/v1/news?{}'.format(params))
    res = conn.getresponse().read()
    reliance_news=json.loads(res.decode('utf-8'))["data"]
    return reliance_news

def update_news_dataset():
    global news_inserted
    with open(news_file_path,'r') as file:
        reliance_news=json.load(file)
        for news in reliance_news['articles']:
            if news['published_at'].split('T')[0]==yesterday:
                news_inserted=True
                break
        current_reliance_news=None
        if news_inserted==False:
            conn.request('GET', '/v1/news?{}'.format(params))
            res = conn.getresponse().read()
            current_reliance_news=json.loads(res.decode('utf-8'))["data"]
            reliance_news['articles']+=current_reliance_news
        return reliance_news['articles'],current_reliance_news

In [None]:
#create news dataset 
if os.path.exists(news_file_path)==False:
    reliance_news=create_news_dataset()
    current_reliance_news=reliance_news.copy()
else:
    reliance_news,current_reliance_news=update_news_dataset()

with open(output_news_file_path,'w') as file:
    json.dump({"articles":reliance_news},file)

In [None]:
reliance_news

In [None]:
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

In [None]:
def create_news_sentiment_dataset(news_sentiments):
    last_position=len(news_sentiments)
    article_ind=last_position
    title_description=[]
    if current_reliance_news!=None:
        for article in current_reliance_news:
            title_description.append(article['title']+' '+article['description'])
            news_sentiments.at[article_ind,'published_at']=article['published_at']
            news_sentiments.at[article_ind,'title']=article['title']
            news_sentiments.at[article_ind,'description']=article['description']
            news_sentiments.at[article_ind,'url']=article['url']
            article_ind+=1
        news_label_and_scores=classifier(list(title_description))
        labels=[pred['label'] for pred in news_label_and_scores]
        scores=[pred['score'] for pred in news_label_and_scores]
        news_sentiments.at[last_position:,'sentiment']=labels
        news_sentiments.at[last_position:,'sentiment_score']=scores
    
    news_sentiments.to_csv(output_news_sentiment_file_path,index=None)  

In [None]:
# create news sentiment dataset
news_sentiments=None
if os.path.exists(news_sentiment_file_path):
    news_sentiments=pd.read_csv(news_sentiment_file_path,index_col=None)                     
else:
    news_sentiments=pd.DataFrame(columns=['published_at','title','description','url','sentiment','sentiment_score'])
create_news_sentiment_dataset(news_sentiments)

In [None]:
news_sentiments