In [26]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf

In [27]:
#import NVDA ticker data
nvda_data = yf.download("NVDA", start="2014-01-01", auto_adjust=True)
nvda_data.head()

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,NVDA,NVDA,NVDA,NVDA,NVDA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2014-01-02,0.373864,0.376693,0.370564,0.375278,260092000
2014-01-03,0.369385,0.375278,0.368206,0.374571,259332000
2014-01-06,0.374335,0.377164,0.369621,0.373157,409492000
2014-01-07,0.380464,0.381879,0.375514,0.378107,333288000
2014-01-08,0.38565,0.387536,0.380464,0.381879,308192000


In [28]:
#flattening of columns to allows for analysis
nvda_data.columns = nvda_data.columns.get_level_values(0)

#Make date a column and not an index 
nvda_data = nvda_data.reset_index()

nvda_data.head()

Price,Date,Close,High,Low,Open,Volume
0,2014-01-02,0.373864,0.376693,0.370564,0.375278,260092000
1,2014-01-03,0.369385,0.375278,0.368206,0.374571,259332000
2,2014-01-06,0.374335,0.377164,0.369621,0.373157,409492000
3,2014-01-07,0.380464,0.381879,0.375514,0.378107,333288000
4,2014-01-08,0.38565,0.387536,0.380464,0.381879,308192000


In [29]:
#Load and Inspect QQQ data

QQQ_data = pd.read_csv("market_data/QQQ_split_adj.csv")

QQQ_data.head()

Unnamed: 0,date,open,high,low,close,volume,raw_close,change_percent,avg_vol_20d
0,1999-03-10,51.0625,51.15625,50.28125,51.0625,5232200,102.125,,
1,1999-03-11,51.4375,51.7344,50.3125,51.3125,9688600,102.625,0.49,
2,1999-03-12,51.125,51.15625,49.65625,50.0625,8743600,100.125,-2.44,
3,1999-03-15,50.4375,51.5625,49.90625,51.5,6369000,103.0,2.87,
4,1999-03-16,51.71875,52.15625,51.15625,51.9375,4905800,103.875,0.85,


In [34]:
#Data clean up and changing NVDA column names to match QQQ

nvda_data = nvda_data.rename(columns={
    "Date": "date",
    "Close": "close",
    "High": "high",
    "Low": "low",
    "Open": "open",
    "Volume": "volume"
    })

#Enforce dateTime object casting
nvda_data['date'] = pd.to_datetime(nvda_data['date'])
QQQ_data['date'] = pd.to_datetime(QQQ_data['date'])

#Filter QQQ data to start from 2014

QQQ_data = QQQ_data[QQQ_data['date'] >= '2014-01-01']

#Drop redundant columns

QQQ_data = QQQ_data.drop(["raw_close", "change_percent", "avg_vol_20d"], axis = 1, errors = 'ignore')

QQQ_data.head()










Unnamed: 0,date,open,high,low,close,volume
3728,2014-01-02,87.55,87.58,87.02,87.27,29190010
3729,2014-01-03,87.27,87.35,86.62,86.64,35727320
3730,2014-01-06,86.65,86.76,86.0,86.32,32092439
3731,2014-01-07,86.7,87.25,86.56,87.12,25913230
3732,2014-01-08,87.11,87.55,86.945,87.31,27209990


In [39]:
#Remove ghost header

nvda_data.columns.name = None
nvda_data.head()

Unnamed: 0,date,close,high,low,open,volume
0,2014-01-02,0.373864,0.376693,0.370564,0.375278,260092000
1,2014-01-03,0.369385,0.375278,0.368206,0.374571,259332000
2,2014-01-06,0.374335,0.377164,0.369621,0.373157,409492000
3,2014-01-07,0.380464,0.381879,0.375514,0.378107,333288000
4,2014-01-08,0.38565,0.387536,0.380464,0.381879,308192000
