In [49]:
import requests
from datetime import datetime as dt
from datetime import timedelta as td
import numpy as np
import pandas as pd
import time
import warnings
warnings.filterwarnings('ignore')
import yfinance as yf

In [21]:
## Section 1 Importing Stock Data Using Polygon.io API


In [2]:
# Ancilliary function that returns each day between a date range 
def daterange(start_date, end_date):
    
    '''This function extracts by performing get calls on the polygon grouped daily bars API endpoint.
        start_date: datetime
        end_date: datetime 
        return: list of datetime objects

    '''
    print('Total days in daterange',int ((end_date - start_date).days))
    for n in range(int ((end_date - start_date).days)):
        yield start_date + td(n)


In [3]:
## Extraction Function
def extract(start_date,end_date):
    '''This function extracts by performing get calls on the polygon grouped daily bars API endpoint.
        start_date: str
        end_date: str 
        return: pd.DataFrame containing ticker, date and stock details 

    '''

    print("Extracting")
    # Type conversions for parsing string as argument to "get" 
    s_date = dt.strptime(start_date,"%Y-%m-%d")
    e_date = dt.strptime(end_date,"%Y-%m-%d")
    
    # Temporary Dataframe that stores daily results
    result_df = pd.DataFrame()
    call_limit = 0
    # Iterate for each day and store results in the temporary dataframe
    for single_date in daterange(s_date, e_date):
        
        current_day_str = single_date.strftime("%Y-%m-%d")
        result = requests.get("https://api.polygon.io/v2/aggs/grouped/locale/us/market/stocks/"+current_day_str+"?adjusted=true&include_otc=true&apiKey=pgii3xgdAKOYGUUFQHqszfF1kFhH0Hbe")
        print("Villager Sleeps for 15 seconds")
        time.sleep(15)
        print("Mafia Wakes Up")
        if result.status_code == 200 and result.text is not None and result.json()['resultsCount']!=0:
            print("Appending for date: ",current_day_str)
            result_df = result_df.append(pd.DataFrame(result.json()['results']))
            call_limit+=1
        
        if call_limit==4:
            print("Mafia has killed max number of villagers. Entire City Sleeps")
            time.sleep(5)
            print("City Wakes Up")
            call_limit = 0
                
    return result_df


In [4]:
# Transform and Load Function
def transform_load(df,ticker =[]):
    '''
       This function performs the necessary transformations required on the extracted 
       Dataframe filters it for the specific and stores the results in a pickle file(.pkl) 
       df: pd.DataFrame
       ticker: String containing the Ticker Symbol of the Stock to be Trained
       return: 

    '''
    
    print("Performing Necessary Transformations")
    #Renaming Columns
    column_dict = {'T':'Ticker','v':'Volume','vw':'VWAP','o':'Open','c':'Close','h':'High','l':'Low','t':'Timestamp','n':'TransCount'}
    df = df.rename(columns = column_dict)

    # Filtering Specific Tickers
    df = df[df['Ticker'].isin(ticker)]

    # Converting UTC Timestamps to normal strings   
    df['Timestamp'] = df['Timestamp'].apply(lambda x:(dt.utcfromtimestamp(x/1000).strftime("%Y-%m-%d")))
    
    #Print rowcount in training dataset
    print(len(df))

    #Saving to a Pickle File
    df.to_pickle("C:/Users/abhij/Git Repos/DeepLearningLabs/RNNs_in_Action/training.pkl")
     

In [None]:
transform_load(extract("2018-10-09","2020-10-08"),['AAPL',
'NVDA',
'AMZN',
'MSFT',
'TSLA',
'GOOG',
'AMD',
'META',
'INTC',
'TTD',
'MTCH',
'PYPL',
'ATVI',
'EA',
'ZG',
'AMAT',
'ADBE',
'GOOGL',
'ADI',
'AKAM',
'ASML',
'AERC',
'ALKT',
'AKTS',
'AEY',
'AMOT',
'AMKR',
'ARRY',
'ANSS',
'QCOM',
'ALGN',
'APCX'])

In [25]:
results_df = pd.read_pickle("C:/Users/abhij/Git Repos/DeepLearningLabs/RNNs_in_Action/training.pkl")

'2022-08-08'

In [27]:
## Section 2: Importing Stock Data Using Yahoo Finance API

In [35]:
ticker_list = ['AAPL',
'NVDA','AMZN','MSFT','TSLA','GOOG','AMD','META','INTC','TTD','MTCH','PYPL','ATVI','EA',
'ZG','AMAT','ADBE','GOOGL','ADI','AKAM','ASML','AERC','ALKT','AKTS','AEY','AMOT','AMKR',
'ARRY','ANSS','QCOM','ALGN','APCX']
final_df = pd.DataFrame()
for i in ticker_list:
    ticker = yf.Ticker(i)
    ticker_df = ticker.history(period="5y")
    ticker_df["Ticker"] = i
    final_df = final_df.append(ticker_df)    
print(final_df.shape[0])
final_df.to_pickle("C:/Users/abhij/Git Repos/DeepLearningLabs/RNNs_in_Action/training_yfinance.pkl")

37454


In [36]:
training_yfinance = pd.read_pickle("C:/Users/abhij/Git Repos/DeepLearningLabs/RNNs_in_Action/training_yfinance.pkl")

In [51]:
training_yfinance = training_yfinance[np.logical_not(training_yfinance["Open"].isna())]

In [85]:
training_yfinance.loc["2020-08-28":"2020-08-31"]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Ticker
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-08-28,124.509492,124.934364,123.091611,123.318871,187630000.0,0.0,0.0,AAPL
2020-08-31,126.058315,129.437522,124.497158,127.500893,225702700.0,0.0,4.0,AAPL
2020-08-28,126.515815,131.237071,126.191418,131.234573,53716000.0,0.0,0.0,NVDA
2020-08-31,131.583926,135.499179,130.136608,133.497879,50084000.0,0.0,0.0,NVDA
2020-08-28,171.149994,171.668503,169.324997,170.089996,57940000.0,0.0,0.0,AMZN
2020-08-31,170.449493,174.75,170.25,172.548004,83718000.0,0.0,0.0,AMZN
2020-08-28,224.703262,227.125786,223.12765,225.42215,26292900.0,0.0,0.0,MSFT
2020-08-31,223.541262,225.215356,220.892246,222.093658,28774200.0,0.0,0.0,MSFT
2020-08-28,459.023987,463.697998,437.303986,442.679993,100406000.0,0.0,0.0,TSLA
2020-08-31,444.609985,500.140015,440.109985,498.320007,118374400.0,0.0,5.0,TSLA


In [None]:
training_yfinance.plot(x = "",  y = "")