Question 1: What is the correlation between Google Search Trends (Could also include Bing Keyword Research Tool) and Volume for stocks?
- Step 1: Select 3 stocks (AAPL, NVDA, TSLA) and download financial info
- Step 2: Get Google Search Trends & Bing Keyword Research Tool (as .csv)
- Step 3: Get financial information for 3 chosen stocks (as .csv)
- Step 4: Find correlation using Pearson test 
- Step 5: Visualize using line graph 

In [14]:
import pandas as pd
import plotly.express as px 
from scipy.stats import pearsonr, chi2_contingency


In [3]:
class Date: 
    def __init__(self, month: int, day: int, year: int):
        self.month = month
        self.day = day
        self.year = year
    def __str__(self) -> str:
        return f'{self.year}-{self.month}-{self.day}'
    def decrement_day(self):
        if self.day > 1:
            self.day -= 1
        else:
            # Handle days in months with 30 days or February
            if self.month in [1, 3, 5, 7, 8, 10, 12]:
                if self.month == 1:
                    self.month = 12
                    self.year -= 1
                else:
                    self.month -= 1
                self.day = 31
            elif self.month == 3:  # February case
                self.month = 2
                # Check for leap year
                if (self.year % 4 == 0 and self.year % 100 != 0) or (self.year % 400 == 0):
                    self.day = 29
                else:
                    self.day = 28
            else:  # Months with 30 days
                self.month -= 1
                self.day = 30

In [29]:
def change_dates(stock_fin):
    new_dates = []
    for index, row in stock_fin.iterrows():
        date = row['Date']
        date = date.split('-')
        year = int(date[0])
        month = int(date[1])
        day = int(date[2])
        temp = Date(month, day, year)
        temp.decrement_day()
        new_dates.append(str(temp))
    stock_fin['Date'] = new_dates    

def stock_correlation(fin_csv: str, trends_csv: str):
    trends = pd.read_csv(trends_csv, skiprows=2)
    stock_price = pd.read_csv(fin_csv)

    # prior to merge, must convert one of the data sets to the same as the other dataset
    change_dates(stock_price)

    merged_data = trends.merge(stock_price, left_on='Week', right_on='Date')
    # scale volume down by 1M
    merged_data['Volume'] = merged_data['Volume']/10000000

    fig = px.line(merged_data, x='Week',y=['Interest', 'Volume']).show()



In [27]:
stock_correlation('NVDA.csv', 'nvda_trends.csv')