In [18]:
import requests
import json
import pandas as pd
import numpy as np

## Data Extracts

In [3]:
def get_df(data):
    data = data.json()
    try:
        df = pd.DataFrame(data['feed'])
        df = df[[
        'title',
        'url',
        'time_published',
        'summary',
        'source',
        'category_within_source',
        'source_domain',
        'overall_sentiment_score',
        'overall_sentiment_label',
        'ticker_sentiment']]
        df = pd.concat([df.explode('ticker_sentiment').drop(['ticker_sentiment'], axis=1),
               df.explode('ticker_sentiment')['ticker_sentiment'].apply(pd.Series)],
              axis=1)
        df['time_published'] = pd.to_datetime(df['time_published'], format='%Y%m%dT%H%M%S')
        return df 
    except:
        return pd.DataFrame()

In [None]:
tickers = get_snp_tickers()
df = pd.DataFrame()

chunks = [tickers[i:i + 5] for i in range(0, len(tickers), 5)]

for chunk in chunks:
    for ticker in chunk:
        print(ticker)
        url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={ticker}&apikey={TOKEN}'
        r = requests.get(url)
        df = pd.concat([df, get_df(r)], ignore_index=True)
    time.sleep(70)

In [121]:
df.to_csv('data/alpha_vantage_news.csv', index=False)

## Exploration

In [3]:
df = pd.read_csv('data/alpha_vantage_news.csv')

In [4]:
df.drop_duplicates(inplace=True)


In [5]:
df['time_published'] = pd.to_datetime(df['time_published'])

In [6]:
df.head()

Unnamed: 0,title,url,time_published,summary,source,category_within_source,source_domain,overall_sentiment_score,overall_sentiment_label,ticker,relevance_score,ticker_sentiment_score,ticker_sentiment_label
0,"Digital Signage Market size to grow by USD 8,1...",https://www.prnewswire.com/news-releases/digit...,2023-06-09 22:30:00,"Digital Signage Market size to grow by USD 8,1...",PR Newswire,,www.prnewswire.com,0.184721,Somewhat-Bullish,SNEJF,0.031864,0.027423,Neutral
1,"Digital Signage Market size to grow by USD 8,1...",https://www.prnewswire.com/news-releases/digit...,2023-06-09 22:30:00,"Digital Signage Market size to grow by USD 8,1...",PR Newswire,,www.prnewswire.com,0.184721,Somewhat-Bullish,SSNLF,0.031864,0.027423,Neutral
2,"Digital Signage Market size to grow by USD 8,1...",https://www.prnewswire.com/news-releases/digit...,2023-06-09 22:30:00,"Digital Signage Market size to grow by USD 8,1...",PR Newswire,,www.prnewswire.com,0.184721,Somewhat-Bullish,CSCO,0.031864,0.027423,Neutral
3,"Digital Signage Market size to grow by USD 8,1...",https://www.prnewswire.com/news-releases/digit...,2023-06-09 22:30:00,"Digital Signage Market size to grow by USD 8,1...",PR Newswire,,www.prnewswire.com,0.184721,Somewhat-Bullish,HNHPF,0.031864,0.027423,Neutral
4,"Digital Signage Market size to grow by USD 8,1...",https://www.prnewswire.com/news-releases/digit...,2023-06-09 22:30:00,"Digital Signage Market size to grow by USD 8,1...",PR Newswire,,www.prnewswire.com,0.184721,Somewhat-Bullish,INTC,0.031864,0.027423,Neutral


In [23]:
tsla =df.query('ticker == "TSLA"')

In [30]:
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.offline as py
from statsmodels.nonparametric.smoothers_lowess import lowess


tsla = tsla.sort_values(by="time_published")
x = (tsla['time_published'] - tsla['time_published'].min()).dt.total_seconds()

# y is the 'ticker_sentiment_score'
y = tsla['ticker_sentiment_score']
loess_smoothed = lowess(y, x, frac=0.1) # try adjusting frac to change the amount of smoothing

y_moving_avg = y.rolling(window=7).mean()


# calculate regression line
slope, intercept = np.polyfit(x, y, 1)
reg_line = slope*x + intercept

fig = px.line(tsla, x="time_published", y="ticker_sentiment_score", title='$TSLA Over Time')
fig.add_trace(go.Scatter(x=tsla['time_published'], y=loess_smoothed[:,1], mode='lines', name='LOESS Smoothed'))
fig.add_trace(go.Scatter(x=tsla['time_published'], y=reg_line, mode='lines', name='Regression Line'))
fig.add_trace(go.Scatter(x=tsla['time_published'], y=y_moving_avg, mode='lines', name='Moving Average'))

# plot the figure
py.plot(fig)


'temp-plot.html'