In [22]:
import yfinance as yf
import pandas as pd

In [23]:
# generate list of sp500 indices
tickerlist = ['SPY', 'XLF', 'XLU', 'XLK', 'XLE', 'XLI',
             'XLB', 'XLP', 'XLV', 'XLY', 'XLRE', 'XLC']

In [26]:
# generate list of tickers
tickers = [yf.Ticker(tick) for tick in tickerlist]

In [27]:
# store full name of each index in list
namelist = []
for tick in tickers:
    try:
        name = tick.info['longName']
        namelist.append(name)
    except KeyError:
        namelist.append(None)
        print(f"there's no name listed for {tick}")

In [28]:
# display full names
namelist

['SPDR S&P 500 ETF Trust',
 'Financial Select Sector SPDR Fund',
 'Utilities Select Sector SPDR Fund',
 'Technology Select Sector SPDR Fund',
 'Energy Select Sector SPDR Fund',
 'Industrial Select Sector SPDR Fund',
 'Materials Select Sector SPDR Fund',
 'Consumer Staples Select Sector SPDR Fund',
 'Health Care Select Sector SPDR Fund',
 'Consumer Discretionary Select Sector SPDR Fund',
 'The Real Estate Select Sector SPDR Fund',
 'Communication Services Select Sector SPDR Fund']

In [29]:
# pull price history for each index
histlist = [tick.history('12mo') for tick in tickers]

In [32]:
# reset index for each dataframe and set date format
for df in histlist:
    df.reset_index(inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])

In [33]:
# add columns to history dataframes
for df in histlist:
    df['Amplitude'] = df['High'] - df['Low']
    df['Daily Change'] = df['Open'] - df['Close']

In [34]:
histlist[0]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Amplitude,Daily Change
0,2020-09-01,345.070324,347.533634,344.114558,347.425262,54908700,0.0,0,3.419076,-2.354939
1,2020-09-02,349.464909,353.485018,348.243086,352.450439,69540000,0.0,0,5.241931,-2.985531
2,2020-09-03,350.647283,351.149808,337.562180,340.321106,148011100,0.0,0,13.587628,10.326177
3,2020-09-04,341.050201,342.725234,329.955443,337.542450,139156300,0.0,0,12.769791,3.507751
4,2020-09-08,331.768428,337.611422,327.994650,328.319794,114465300,0.0,0,9.616772,3.448634
...,...,...,...,...,...,...,...,...,...,...
247,2021-08-25,448.170013,449.459991,447.769989,448.910004,40529700,0.0,0,1.690002,-0.739990
248,2021-08-26,448.609985,448.859985,446.160004,446.260010,57829600,0.0,0,2.699982,2.349976
249,2021-08-27,447.119995,450.649994,447.059998,450.250000,77201900,0.0,0,3.589996,-3.130005
250,2021-08-30,450.970001,453.070007,450.709991,452.230011,48357400,0.0,0,2.360016,-1.260010


In [37]:
# import covid data
covid = pd.read_csv('United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv')
covid

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,conf_death,prob_death,new_death,pnew_death,created_at,consent_cases,consent_deaths
0,03/11/2021,KS,297229,241035,56194,0,0,4851,,,0,0,03/12/2021 03:20:13 PM,Agree,
1,06/11/2021,TX,2965966,,,1463,355,51158,,,17,0,06/13/2021 12:00:00 AM,Not agree,Not agree
2,02/04/2020,AR,0,,,0,,0,,,0,,03/26/2020 04:22:39 PM,Not agree,Not agree
3,05/20/2021,FL,2261584,,,2317,764,36981,,,41,7,05/20/2021 12:00:00 AM,Not agree,Not agree
4,05/09/2021,PW,0,,,0,0,0,,,0,0,05/10/2021 02:15:36 PM,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35035,03/08/2020,OH,0,,,0,,0,,,0,,03/26/2020 04:22:39 PM,Agree,Agree
35036,04/27/2021,TN,841473,695941,145532,1285,427,12187,9803,2384,11,3,04/27/2021 12:00:00 AM,Agree,Agree
35037,08/21/2021,NM,224528,,,732,0,4475,,,3,0,08/23/2021 12:48:27 PM,,Not agree
35038,03/10/2021,SD,113962,,,209,37,1904,1546,358,3,0,03/11/2021 03:36:21 PM,,Agree


In [44]:
# drop rows with missing data
#covid.dropna(axis=0, inplace=True)
#county = covid['state'].value_counts()
print(county)
covid

AL     584
CA     571
MI     545
NYC    544
NC     543
TN     543
MS     542
PR     541
KY     540
WY     500
OH     500
ID     500
CO     497
CT     497
VA     497
ME     496
WI     496
MT     496
OR     496
AZ     496
IL     496
RMI    495
DE     495
MP     495
FSM    495
GA     495
UT     494
MA     403
OK     359
NJ     244
Name: state, dtype: int64


Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,conf_death,prob_death,new_death,pnew_death,created_at,consent_cases,consent_deaths
7,08/01/2021,GA,1187107,937515,249592,3829,1144,21690,18725,2965,7,0,08/02/2021 02:03:57 PM,Agree,Agree
8,10/01/2020,MP,73,73,0,3,0,2,2,0,0,0,10/02/2020 01:40:08 PM,Agree,Agree
9,07/26/2021,OK,475578,373929,101649,1028,264,7488,6379,1109,8,2,07/27/2021 02:48:23 PM,Agree,Agree
10,12/04/2020,MP,106,106,0,0,0,2,2,0,0,0,12/05/2020 02:29:17 PM,Agree,Agree
12,04/15/2021,MP,162,162,0,1,0,2,2,0,0,0,04/16/2021 02:12:25 PM,Agree,Agree
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35022,06/11/2020,TN,31011,30813,198,737,6,487,464,23,1,0,06/11/2020 12:00:00 AM,Agree,Agree
35025,02/10/2021,TN,755168,635913,119255,2496,598,11002,8860,2142,40,7,02/10/2021 12:00:00 AM,Agree,Agree
35030,05/09/2020,OH,23697,22560,1137,681,90,1331,1214,117,25,-4,05/10/2020 09:26:08 PM,Agree,Agree
35036,04/27/2021,TN,841473,695941,145532,1285,427,12187,9803,2384,11,3,04/27/2021 12:00:00 AM,Agree,Agree


In [None]:
# reduce to columns of interest
cov_df = covid[['submission_date', 'state', 'tot_cases', 'tot_death']]