<a href="https://colab.research.google.com/github/jturnbach/DS320-Project/blob/main/DS320_Project_Calc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

NYT Covid Data By County = https://github.com/nytimes/covid-19-data

In [159]:
import pandas as pd
import yfinance as yf
from io import StringIO, BytesIO
import requests
import zipfile
import matplotlib.pyplot as plt

In [160]:
github_repo_url = 'https://github.com/jturnbach/DS320-Project'

#create path dictionary
covid_data_by_county_paths = {
    2020: 'covid_cases_by_county_2020.csv.zip',
    2021: 'covid_cases_by_county_2021.csv.zip',
    2022: 'covid_cases_by_county_2022.csv.zip',
    2023: 'covid_cases_by_county_2023.csv.zip'
}

#covid dict format: (year): (csv_file)
covid_by_county = {}
covid_by_state = pd.DataFrame({})
covid = {}

#load files from path dict to covid dict
for year in covid_data_by_county_paths.keys():
  url = f'{github_repo_url}/raw/main/{covid_data_by_county_paths[year]}'
  response = requests.get(url)
  if response.status_code == 200:
    with zipfile.ZipFile(BytesIO(response.content)) as z:
      with z.open(covid_data_by_county_paths[year][:-4]) as file:
        covid_by_county[year] = pd.read_csv(file)

url = f'{github_repo_url}/raw/main/us-states.csv'
response = requests.get(url)
print(response.status_code)
if response.status_code == 200:
  covid_by_state = pd.read_csv(BytesIO(response.content))
  print(covid_by_state)
print(type(covid_by_state))
covid = {year: group for year, group in covid_by_state.groupby(pd.to_datetime(covid_by_state['date']).dt.year)}


200
             date          state  fips    cases  deaths
0      2020-01-21     Washington    53        1       0
1      2020-01-22     Washington    53        1       0
2      2020-01-23     Washington    53        1       0
3      2020-01-24       Illinois    17        1       0
4      2020-01-24     Washington    53        1       0
...           ...            ...   ...      ...     ...
61937  2023-03-23       Virginia    51  2298300   23782
61938  2023-03-23     Washington    53  1940704   15905
61939  2023-03-23  West Virginia    54   645710    8132
61940  2023-03-23      Wisconsin    55  2014524   16485
61941  2023-03-23        Wyoming    56   185800    2014

[61942 rows x 5 columns]
<class 'pandas.core.frame.DataFrame'>


In [161]:
# Load COVID-19 dataset for 2020
#covid[2020] = pd.read_csv(#Need the paths here)

# Load COVID-19 dataset for 2021
#covid[2021] = pd.read_csv(#Need the paths here)

# Concatenate both datasets
covid_data = pd.concat([pd.DataFrame(covid[2020]), pd.DataFrame(covid[2021])])
covid_data['new_cases'] = covid_data.groupby('state')['cases'].diff().fillna(covid_data['cases'])

# Fetch DASH stock data using yfinance
dash = yf.Ticker('DASH')
stock_dash = dash.history(period='1y')  # Fetch data from 2020 - 2021

# Fixing the dates
covid_data['date'] = pd.to_datetime(covid_data['date']).dt.date
stock_dash.reset_index(inplace=True)  # Resetting index to have 'Date' as a column
stock_dash['Date'] = pd.to_datetime(stock_dash['Date']).dt.date

# Merge datasets
merged_data = pd.merge(covid_data, stock_dash, left_on='date', right_on='Date', how='inner')

# Calculate Pearson correlation coefficient
pearson_corr = merged_data['new_cases'].corr(merged_data['Open'])
print(f"Pearson Correlation Coefficient: {pearson_corr}")

# Calculate Covariance
covariance = merged_data['new_cases'].cov(merged_data['Open'])
print(f"Covariance: {covariance}")

# Rolling Window Correlation (example window size = 10)
rolling_corr = merged_data['new_cases'].rolling(window=10).corr(merged_data['Open'])
print("Rolling Window Correlation:")
print(rolling_corr)

# Moving Averages (example window size = 7)
covid_data['cases_MA'] = covid_data['new_cases'].rolling(window=7).mean()
stock_dash['Open_MA'] = stock_dash['Open'].rolling(window=7).mean()

# Daily Returns for stock prices
stock_dash['Returns'] = stock_dash['Open'].pct_change()

# Volatility (example window size = 30)
stock_dash['Volatility'] = stock_dash['Open'].rolling(window=30).std()

# Print some calculated outputs
print("\nMoving Averages for COVID-19 Cases:")
print(covid_data['cases_MA'].head(15))  # Print the first 15 values of the moving average for COVID-19 cases
print("\nDaily Returns for DASH Stock Prices:")
print(stock_dash['Returns'].head(15))  # Print the first 15 daily returns for DASH stock prices
print("\nVolatility for DASH Stock Prices:")
print(stock_dash['Volatility'].head(15))  # Print the first 15 values of volatility for DASH stock prices

Pearson Correlation Coefficient: nan
Covariance: nan
Rolling Window Correlation:
Series([], dtype: float64)

Moving Averages for COVID-19 Cases:
0          NaN
1          NaN
2          NaN
3          NaN
4          NaN
5          NaN
6     0.428571
7     0.285714
8     0.428571
9     0.571429
10    0.428571
11    0.428571
12    0.285714
13    0.285714
14    0.285714
Name: cases_MA, dtype: float64

Daily Returns for DASH Stock Prices:
0          NaN
1    -0.035369
2    -0.017199
3     0.041538
4    -0.016617
5     0.043748
6     0.101097
7    -0.052442
8    -0.008621
9    -0.042609
10   -0.041054
11   -0.033340
12    0.019008
13    0.000000
14   -0.019808
Name: Returns, dtype: float64

Volatility for DASH Stock Prices:
0    NaN
1    NaN
2    NaN
3    NaN
4    NaN
5    NaN
6    NaN
7    NaN
8    NaN
9    NaN
10   NaN
11   NaN
12   NaN
13   NaN
14   NaN
Name: Volatility, dtype: float64
