In [26]:
import requests
import pandas as pd
import sqlite3
from datetime import datetime, timedelta
from dateutil import rrule
import os
import json
from dotenv import load_dotenv, find_dotenv
import pytz

load_dotenv(find_dotenv())

True

# Fetching top 100 public companies

In [27]:
symbols_df=pd.read_csv('marketcap.csv').sort_values(by='Market Cap', ascending=False)[['Symbol','Name','Market Cap','IPO Year','Country']]
symbols_df.head(3)


Unnamed: 0,Symbol,Name,Market Cap,IPO Year,Country
22,AAPL,Apple Inc. Common Stock,2727235000000.0,1980.0,United States
4519,MSFT,Microsoft Corporation Common Stock,2460812000000.0,1986.0,United States
3034,GOOG,Alphabet Inc. Class C Capital Stock,1741555000000.0,2004.0,United States


In [28]:
symbols_df=symbols_df.loc[(symbols_df['IPO Year']<2020)]

In [29]:
symbols_to_drop=['GOOG','TBC']# drop rows with other class stocks
symbols_df= symbols_df.loc[~symbols_df['Symbol'].isin(symbols_to_drop)].head(100).reset_index(drop=True)

In [30]:
symbols_list=symbols_df['Symbol'].tolist()
print(symbols_list)

['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'TSLA', 'META', 'TSM', 'MA', 'AVGO', 'ORCL', 'ABBV', 'ADBE', 'ASML', 'CSCO', 'BABA', 'CRM', 'ACN', 'NFLX', 'TBB', 'INTU', 'AMGN', 'UPS', 'PDD', 'QCOM', 'HDB', 'NOW', 'AMAT', 'GS', 'RYAAY', 'SBUX', 'RIO', 'BLK', 'ISRG', 'UBER', 'GILD', 'VRTX', 'UBS', 'REGN', 'ZTS', 'LRCX', 'BX', 'SHOP', 'CME', 'BTI', 'EQIX', 'VMW', 'HCA', 'MELI', 'SNPS', 'RELX', 'ICE', 'KLAC', 'STLA', 'RACE', 'ANET', 'BN', 'ORLY', 'EL', 'ROP', 'CTAS', 'CMG', 'TEAM', 'ING', 'NXPI', 'TAK', 'DELL', 'JD', 'MET', 'TDG', 'FTNT', 'RSG', 'BIDU', 'LULU', 'MRVL', 'GM', 'ODFL', 'MRNA', 'ABEV', 'MCHP', 'PAYX', 'CPRT', 'ALC', 'TTD', 'BSBR', 'HLT', 'ROST', 'COF', 'LNG', 'CRWD', 'DLR', 'SGEN', 'IQV', 'KMI', 'DOW', 'CTVA', 'DXCM', 'IDXX', 'LVS', 'PRU']


## Saving symbols in Sqlite database- constants.db

In [31]:
def save_symbols_to_sqlite():
    conn = sqlite3.connect('../database/constants.db')
    c= conn.cursor()
    c.execute('DROP TABLE IF EXISTS stock_indices')
    c.execute('''CREATE TABLE stock_indices (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        stock_index TEXT NOT NULL)''')
    for symbol in symbols_list:
        c.execute("INSERT INTO stock_indices (stock_index) VALUES (?)",(symbol,))
    conn.commit()
#save_symbols_to_sqlite()


# Saving Alpha Vantage and FinnHUB data to local files

In [32]:
def get_daily_news_sentiments(symbol:str,start_date:datetime.date,end_date:datetime.date,api_key:str):
    #iterate through days to get news sentiment
    for date in rrule.rrule(rrule.DAILY, dtstart=start_date, until=end_date):
        if date.weekday() in [5,6]:
            continue
        else:            
            time_from = date.strftime("%Y%m%d")+"T0000"
            time_to = date.strftime("%Y%m%d")+"T2359"
            url = "https://www.alphavantage.co/query?function=NEWS_SENTIMENT"+\
                "&apikey={}&tickers={}&time_from={}&time_to={}&sort=RELEVANCY&limit=1000".\
                        format(api_key,symbol,time_from,time_to)
            while(True):
                r = requests.get(url)
                if r.text!='{\n    "Note": "Thank you for using Alpha Vantage! Our standard API call frequency is 5 calls per minute and 100 calls per day. Please visit https://www.alphavantage.co/premium/ if you would like to target a higher API call frequency."\n}':  
                    break

            data = r.json()
            filename='news_sentiment/'+symbol+'_'+date.strftime("%Y%m%d")+'.json'
            with open(filename, 'w') as f:
                json.dump(data, f)

In [33]:
def get_daily_social_media_sentiment(symbol:str,start_date:datetime.date,end_date:datetime.date,api_key:str):
    #iterate through weeks to get social sentiment
    for date in rrule.rrule(rrule.WEEKLY, dtstart=start_date, until=end_date):
        from_date_str=str(date.year)+"-"+'{:02d}'.format(date.month)+"-"+'{:02d}'.format(date.day)
        to_date=date+timedelta(days=5)
        to_date_str=str(to_date.year)+"-"+'{:02d}'.format(to_date.month)+"-"+'{:02d}'.format(to_date.day)
        url = "https://finnhub.io/api/v1/stock/social-sentiment?"+\
            "token={}&symbol={}&from={}&to={}".\
                format(api_key,symbol,from_date_str,to_date)
        while(True):
            r = requests.get(url)
            if r.status_code!=429:  
                break
                
        data = r.json()
        filename='social_sentiment/'+symbol+'_'+from_date_str+'_'+to_date_str+'.json'
        with open(filename, 'w') as f:
            json.dump(data, f) 

In [36]:
def get_daily_stock_values(symbol:str,start_date:datetime.date,end_date:datetime.date,interval:int,api_key:str):
    #iterate through months to get stock values
    if interval not in {1,5,15,30,60}:
        raise ValueError('Interval must be 1min, 5min, 15min, 30min or 60min')
    for date in rrule.rrule(rrule.MONTHLY, dtstart=start_date, until=end_date):
        year_str=str(date.year)
        month_str='{:02d}'.format(date.month)
        url = "https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY"+\
            "&apikey={}&symbol={}&interval={}min&month={}-{}&outputsize=full&extended_hours=false".\
                format(api_key,symbol,str(interval),year_str,month_str)
        while(True):
            r = requests.get(url)
            if r.text!='{\n    "Note": "Thank you for using Alpha Vantage! Our standard API call frequency is 5 calls per minute and 100 calls per day. Please visit https://www.alphavantage.co/premium/ if you would like to target a higher API call frequency."\n}':  
                    break
                
        data = r.json()
        filename='interday/'+symbol+'_'+year_str+'_'+month_str+'.json'
        with open(filename, 'w') as f:
            json.dump(data, f) 

In [39]:
alpha_vantage_apikey=os.getenv("AlphaVantage_API_KEY")
finnhub_apikey=os.getenv("FinnHub_API_KEY")
interval=15

start_date = datetime(2022, 3, 7).date()
if datetime.now(tz=pytz.timezone('US/Eastern')).hour<17:
    end_date = datetime.today().date()-timedelta(days=1)
else:
    end_date = datetime.today().date()
for symbol in symbols_list:
    get_daily_news_sentiments(symbol,start_date,end_date,alpha_vantage_apikey)  
    get_daily_social_media_sentiment(symbol,start_date,end_date,finnhub_apikey)
    get_daily_stock_values(symbol,start_date,end_date,interval,alpha_vantage_apikey)    