In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf

In [5]:
#import NVDA ticker data

try:
    nvda_data = yf.download("NVDA", start="2000-01-01", auto_adjust=True, multi_level_index=False)
    
    if nvda_data.empty:
        print("Warning: Downloaded dataframe is empty. Check ticker or connection.")
    else:
        print("Data successfully downloaded.")
        print(nvda_data.head())
except Exception as err:
    print(f"\nError downloading: {err}")


[*********************100%***********************]  1 of 1 completed

Data successfully downloaded.
               Close      High       Low      Open     Volume
Date                                                         
2000-01-03  0.089415  0.090967  0.084282  0.090251  300912000
2000-01-04  0.087028  0.088102  0.082610  0.087863  300480000
2000-01-05  0.084163  0.085953  0.082969  0.084521  188352000
2000-01-06  0.078671  0.084163  0.075448  0.084163  120480000
2000-01-07  0.079985  0.080820  0.077120  0.078313   71184000





In [None]:
#flattening of columns to allows for analysis
nvda_data.columns = nvda_data.columns.get_level_values(0)

#Make date a column and not an index 
nvda_data = nvda_data.reset_index()

nvda_data.head()

In [None]:
#Load and Inspect QQQ data

QQQ_data = pd.read_csv("market_data/QQQ_split_adj.csv")

QQQ_data.head()

In [None]:
#Data clean up and changing NVDA column names to match QQQ and label prices

nvda_data = nvda_data.rename(columns={
    "Date": "date",
    "Close": "close_NVDA",
    "High": "high_NVDA",
    "Low": "low_NVDA",
    "Open": "open_NVDA",
    "Volume": "volume_NVDA"
    })

#Rename QQQ price columns

QQQ_data = QQQ_data.rename(columns={
    "date": "date",
    "close": "close_QQQ",
    "high": "high_QQQ",
    "low": "low_QQQ",
    "open": "open_QQQ",
    "volume": "volume_QQQ"
    })

#Enforce dateTime object casting
nvda_data['date'] = pd.to_datetime(nvda_data['date'])
QQQ_data['date'] = pd.to_datetime(QQQ_data['date'])

#Filter QQQ data to start from 2000

QQQ_data = QQQ_data[QQQ_data['date'] >= '2000-01-01']

#Drop redundant columns

QQQ_data = QQQ_data.drop(["raw_close", "change_percent", "avg_vol_20d"], axis = 1, errors = 'ignore')

QQQ_data.head()




In [None]:
#Remove ghost header

nvda_data.columns.name = None
nvda_data.head()

In [None]:
#Table merging

data_study = pd.merge(nvda_data, QQQ_data, on='date', how='inner')

data_study.head()

In [None]:
data_study.tail()

In [None]:
#Reset Index

data_study=data_study.reset_index(drop=True)
data_study.head()

In [None]:
#check for missng values (NaN)

print(data_study.isnull().sum())

In [None]:
#Check for duplicates

print(data_study.duplicated().sum())

In [None]:
#Check Data types

data_study.dtypes

In [None]:
#Size Inspection
data_study.shape

In [None]:
#Calculate the percetage change between the current day close and previous close
data_study['NVDA_pct'] = data_study['close_NVDA'].pct_change()
data_study['QQQ_pct'] = data_study['close_QQQ'].pct_change()

data_study.head()

In [None]:
#Drop initial empty row
data_study = data_study.dropna()

data_study.head()

In [None]:
# Grouping by the actual calendar quarter (Q-DEC means year ends in Dec)
quarterly_corr = data_study.groupby(data_study['date'].dt.to_period('Q')).apply(
    lambda x: x['NVDA_pct'].corr(x['QQQ_pct'])
)

quarterly_corr.head()

In [None]:
#Get the overall correlation value
overall_corr_val = data_study['NVDA_pct'].corr(data_study['QQQ_pct'])

# 2. Create the plot
plt.figure(figsize=(20, 8))

# Plot the Quarterly Correlation as a Bar Chart
quarterly_corr.plot(kind='bar', color='lightgray', alpha=0.7, label='Quarterly Regime')

# Plot the Overall Correlation as a solid Horizontal Line
plt.axhline(y=overall_corr_val, color='red', linestyle='--', linewidth=2, label=f'24-Year Average ({overall_corr_val:.2f})')

# Customizing the look
plt.title('NVDA vs QQQ: Identifying Regime Deviations', fontsize=14)
plt.ylabel('Correlation Coefficient')
plt.xlabel('Quarter')
plt.legend()
plt.xticks(rotation=(90), fontsize = 8)
plt.show()