In [1]:
import requests
from datetime import datetime as dt
from datetime import timedelta as td
import pandas as pd
import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Ancilliary function that returns each day between a date range 
def daterange(start_date, end_date):
    
    '''This function extracts by performing get calls on the polygon grouped daily bars API endpoint.
        start_date: datetime
        end_date: datetime 
        return: list of datetime objects

    '''
    print('Total days in daterange',int ((end_date - start_date).days))
    for n in range(int ((end_date - start_date).days)):
        yield start_date + td(n)


In [3]:
## Extraction Function
def extract(start_date,end_date):
    '''This function extracts by performing get calls on the polygon grouped daily bars API endpoint.
        start_date: str
        end_date: str 
        return: pd.DataFrame containing ticker, date and stock details 

    '''

    print("Extracting")
    # Type conversions for parsing string as argument to "get" 
    s_date = dt.strptime(start_date,"%Y-%m-%d")
    e_date = dt.strptime(end_date,"%Y-%m-%d")
    
    # Temporary Dataframe that stores daily results
    result_df = pd.DataFrame()
    call_limit = 0
    # Iterate for each day and store results in the temporary dataframe
    for single_date in daterange(s_date, e_date):
        
        current_day_str = single_date.strftime("%Y-%m-%d")
        result = requests.get("https://api.polygon.io/v2/aggs/grouped/locale/us/market/stocks/"+current_day_str+"?adjusted=true&include_otc=true&apiKey=pgii3xgdAKOYGUUFQHqszfF1kFhH0Hbe")
        print("Villager Sleeps for 15 seconds")
        time.sleep(15)
        print("Mafia Wakes Up")
        if result.status_code == 200 and result.text is not None and result.json()['resultsCount']!=0:
            print("Appending for date: ",current_day_str)
            result_df = result_df.append(pd.DataFrame(result.json()['results']))
            call_limit+=1
        
        if call_limit==4:
            print("Mafia has killed max number of villagers. Entire City Sleeps")
            time.sleep(5)
            print("City Wakes Up")
            call_limit = 0
                
    return result_df


In [4]:
# Transform and Load Function
def transform_load(df,ticker):
    '''
       This function performs the necessary transformations required on the extracted 
       Dataframe filters it for the specific and stores the results in a pickle file(.pkl) 
       df: pd.DataFrame
       ticker: String containing the Ticker Symbol of the Stock to be Trained
       return: 

    '''
    
    print("Performing Necessary Transformations")
    #Renaming Columns
    column_dict = {'T':'Ticker','v':'Volume','vw':'VWAP','o':'','c':'','h':'','l':'','t':'Timestamp','n':'TransCount'}
    df = df.rename(columns = column_dict)

    # Filtering Specific Tickers
    df = df.loc[df['Ticker']==ticker]

    # Converting UTC Timestamps to normal strings   
    df['Timestamp'] = df['Timestamp'].apply(lambda x:(dt.utcfromtimestamp(x/1000).strftime("%Y-%m-%d")))
    
    #Print rowcount in training dataset
    print(len(df))

    #Saving to a Pickle File
    df.to_pickle("C:/Users/abhij/Git Repos/DeepLearningLabs/RNNs_in_Action/training.pkl")
     

In [5]:
transform_load(extract("2021-07-27","2022-09-08"),"AAPL")

Extracting
Total days in daterange 408
Villager Sleeps for 15 seconds
Mafia Wakes Up
Appending for date:  2021-07-27
Villager Sleeps for 15 seconds
Mafia Wakes Up
Appending for date:  2021-07-28
Villager Sleeps for 15 seconds
