In [1]:
# imports
from tqdm.notebook import tqdm

import os
data_dir = './data'
if not os.path.exists(data_dir): os.makedirs(data_dir)

import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [2]:
start = {'AMZN': '1/4/21', 'NFLX': '2/1/21', 'TSLA': '1/4/21'}

for ticker in tqdm(['AMZN', 'NFLX', 'TSLA'], leave=True):
    # read in trade (not bid or ask) data
    df = pd.read_csv(f'./data/{ticker}.csv', header=3, usecols=range(8))
    # drop NAs caused by bid and/or ask data
    df.dropna(subset='Dates', inplace=True)
    # fix first row with invalid date
    df.loc[0, 'Dates'] = start[ ticker ] + ' 9:30'
    # Dates col to datetime
    df['Dates'] = pd.to_datetime(df.Dates)
    df.set_index('Dates', inplace=True)

    # bounds => starting with x=9.30 am and y=12.00pm on first date on record
    x, ub = df.index.min(), df.index.max()
    y = x + pd.DateOffset(hours=2, minutes=30)
    day = pd.DateOffset(days=1)

    returns = dict()
    i = 0
    # iterating over every single day
    while x < ub and y < ub:
        try:
            open = df.loc[x, 'Open']    # open price at 9.30 am
            close = df.loc[y, 'Close']  # closing price at 12.00 pm
            returns[y] = close / open - 1
        # sometimes we do not have a return on record for some date
        except: i += 1 # i is a counter for failed parsing

        # incrementing x and y datetimes by 1 day to move forward into the future
        x, y = x + day, y + day

    # we will print out how many days we failed to parse just for a sanity check
    if i > 0: print(f'Failed to parse {i} returns for {ticker}.')

    # save returns per ticker as defined in HW2
    returns = pd.DataFrame.from_dict(returns, orient='index')
    returns.columns = [ticker]
    returns.to_csv(f'./data/{ticker}_processed.csv')

  0%|          | 0/3 [00:00<?, ?it/s]

Failed to parse 61 returns for AMZN.
Failed to parse 58 returns for NFLX.
Failed to parse 59 returns for TSLA.
