In [None]:
#dependencies
import pandas as pd
import numpy as np

In [None]:
#reading in the covid data file
all_states_history_path = "Resources/all-states-history.csv"
all_covid_data_df = pd.read_csv(all_states_history_path)
all_covid_data_df.head(10)

In [None]:
#displaying columns
all_covid_data_df.columns

In [None]:
#creating a dict for making a shorter covid df with only relevant cols
covid_cols_dict = {
    'date': [x for x in all_covid_data_df['date']],
    'state': [x for x in all_covid_data_df['state']],
    'positive_cases': [x for x in all_covid_data_df['positive']],
    'total_tests': [x for x in all_covid_data_df['totalTestResults']]
}

In [None]:
#creating a shorter covid df
covid_cols_df = pd.DataFrame(covid_cols_dict)
covid_cols_df.head()

In [None]:
#creating a new col in covid cols df for %age of positive cases
covid_cols_df['percentage_positive'] = (covid_cols_df['positive_cases']/covid_cols_df['total_tests']) * 100
covid_cols_df.head()

In [None]:
#resetting index as the dates
covid_cols_df = covid_cols_df.set_index('date')

In [None]:
#showing the head of the covid cols df
covid_cols_df.head()

In [None]:
#reading in the state abbreviations
path = 'Resources/statesAbbrev.csv'
statesAbbrev_df = pd.read_csv(path)
statesAbbrev_df.head(11)

In [10]:
#list for state codes
state_codes = [x for x in statesAbbrev_df['Code']]
print(state_codes)

['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']


In [11]:
#list for state names
state_names = [x for x in statesAbbrev_df['State']]

In [12]:
#finding the codes for each of the states concerned here 
state_codes[0], state_codes[1], state_codes[2], state_codes[3], state_codes[4], state_codes[5], state_codes[6], state_codes[7], state_codes[8], state_codes[9], state_codes[10]

('AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA')

In [13]:
#reading in the stay at home order df
path = 'Resources/full_SAH_orders.csv'
sah_data_df = pd.read_csv(path)
sah_data_df.head()

Unnamed: 0,state_name,start_date,end_date,gathering_bans,previous_orders_start_date,previous_orders_end_date
0,Hawaii,4/25/2020,5/31/2020,,,
1,Washington,4/2/2020,5/4/2020,,,
2,Montana,4/24/2020,4/26/2020,More than 10,,
3,Maine,4/2/2020,4/30/2020,,,
4,North Dakota,,,,,


In [14]:
#creating a df dict to hold the state dfs, reading in the dfs
states_df_dict = {}
for x in range(0, 11):
    path = f'Resources/US-{state_codes[x]}_cat_data.csv'
    states_df_dict[f'{state_codes[x]}_data_df'] = pd.read_csv(path)

In [15]:
#testing to see how many states lie in the df dict
len(states_df_dict)

11

In [16]:
#inspecting the keys (ie df names)
states_df_dict.keys()

dict_keys(['AL_data_df', 'AK_data_df', 'AZ_data_df', 'AR_data_df', 'CA_data_df', 'CO_data_df', 'CT_data_df', 'DE_data_df', 'DC_data_df', 'FL_data_df', 'GA_data_df'])

In [17]:
#test printing a df
print(state_codes[0])
states_df_dict[f'{state_codes[0]}_data_df'].head(10)

AL


Unnamed: 0,time,arts_entertainment,autos,beauty_fitness,books_lit,action_adventure,campaigns_elections,celebs,discrimination,entertainment_media,...,mobiles,online_vids,scifi_fantasy,sport_news,tv_shows,voice_vid_chat,weather,covid_cases,stay_at_home,mass_gathering_ban
0,2019-01-06,80,71,76,73,60,0,67,16,90,...,67,92,93,59,81,45,48,,False,False
1,2019-01-13,80,73,78,82,72,0,67,14,89,...,68,84,53,58,84,44,66,,False,False
2,2019-01-20,75,73,73,82,67,0,57,11,94,...,64,91,76,51,78,50,72,,False,False
3,2019-01-27,74,72,73,79,68,0,60,18,93,...,61,79,49,53,80,46,100,,False,False
4,2019-02-03,76,74,77,84,58,0,64,17,78,...,56,82,47,82,82,48,54,,False,False
5,2019-02-10,77,72,73,79,67,0,64,20,84,...,61,87,57,46,84,43,59,,False,False
6,2019-02-17,76,75,75,84,64,0,80,21,86,...,62,87,49,39,79,43,82,,False,False
7,2019-02-24,78,79,74,89,68,0,61,21,64,...,65,77,58,29,82,53,69,,False,False
8,2019-03-03,74,73,71,79,70,0,57,29,77,...,68,76,51,37,86,62,88,,False,False
9,2019-03-10,73,75,73,71,59,0,62,19,75,...,58,78,61,43,76,62,81,,False,False


In [18]:
#creating a list of all the dates the states df have cat data for
dates_list = [x for x in states_df_dict[f'{state_codes[0]}_data_df']['time']]
dates_list

['2019-01-06',
 '2019-01-13',
 '2019-01-20',
 '2019-01-27',
 '2019-02-03',
 '2019-02-10',
 '2019-02-17',
 '2019-02-24',
 '2019-03-03',
 '2019-03-10',
 '2019-03-17',
 '2019-03-24',
 '2019-03-31',
 '2019-04-07',
 '2019-04-14',
 '2019-04-21',
 '2019-04-28',
 '2019-05-05',
 '2019-05-12',
 '2019-05-19',
 '2019-05-26',
 '2019-06-02',
 '2019-06-09',
 '2019-06-16',
 '2019-06-23',
 '2019-06-30',
 '2019-07-07',
 '2019-07-14',
 '2019-07-21',
 '2019-07-28',
 '2019-08-04',
 '2019-08-11',
 '2019-08-18',
 '2019-08-25',
 '2019-09-01',
 '2019-09-08',
 '2019-09-15',
 '2019-09-22',
 '2019-09-29',
 '2019-10-06',
 '2019-10-13',
 '2019-10-20',
 '2019-10-27',
 '2019-11-03',
 '2019-11-10',
 '2019-11-17',
 '2019-11-24',
 '2019-12-01',
 '2019-12-08',
 '2019-12-15',
 '2019-12-22',
 '2019-12-29',
 '2020-01-05',
 '2020-01-12',
 '2020-01-19',
 '2020-01-26',
 '2020-02-02',
 '2020-02-09',
 '2020-02-16',
 '2020-02-23',
 '2020-03-01',
 '2020-03-08',
 '2020-03-15',
 '2020-03-22',
 '2020-03-29',
 '2020-04-05',
 '2020-04-

In [19]:
#testing the char indices for only the year-month
dates_list[0][0:7]

'2019-01'

In [20]:
#creating a list of unique year-month combos
all_month_year = [x[0:7] for x in dates_list]
month_year = list(set(all_month_year))
month_year

['2019-07',
 '2019-01',
 '2019-08',
 '2020-05',
 '2020-10',
 '2019-05',
 '2019-04',
 '2020-06',
 '2020-02',
 '2020-01',
 '2020-11',
 '2020-03',
 '2019-06',
 '2019-09',
 '2019-12',
 '2019-10',
 '2020-07',
 '2019-02',
 '2020-08',
 '2020-12',
 '2019-11',
 '2019-03',
 '2020-04',
 '2020-09']

In [21]:
#function for appending dates per month, and their indices, to a month_date_dict
months_dates_dict = {}
def date_dict_append(m):
    month_date_list = []
    for x in range(0, len(dates_list)):
        if dates_list[x][0:7] == month_year[m]:
            month_date_list.append((dates_list[x], x))
    global months_dates_dict
    months_dates_dict[f'{month_year[m]}'] = month_date_list
    print(month_date_list)

In [22]:
#calling the function for each month in the month-year dict
for m in range(0, len(month_year)):
    date_dict_append(m)

[('2019-07-07', 26), ('2019-07-14', 27), ('2019-07-21', 28), ('2019-07-28', 29)]
[('2019-01-06', 0), ('2019-01-13', 1), ('2019-01-20', 2), ('2019-01-27', 3)]
[('2019-08-04', 30), ('2019-08-11', 31), ('2019-08-18', 32), ('2019-08-25', 33)]
[('2020-05-03', 69), ('2020-05-10', 70), ('2020-05-17', 71), ('2020-05-24', 72), ('2020-05-31', 73)]
[('2020-10-04', 91), ('2020-10-11', 92), ('2020-10-18', 93), ('2020-10-25', 94)]
[('2019-05-05', 17), ('2019-05-12', 18), ('2019-05-19', 19), ('2019-05-26', 20)]
[('2019-04-07', 13), ('2019-04-14', 14), ('2019-04-21', 15), ('2019-04-28', 16)]
[('2020-06-07', 74), ('2020-06-14', 75), ('2020-06-21', 76), ('2020-06-28', 77)]
[('2020-02-02', 56), ('2020-02-09', 57), ('2020-02-16', 58), ('2020-02-23', 59)]
[('2020-01-05', 52), ('2020-01-12', 53), ('2020-01-19', 54), ('2020-01-26', 55)]
[('2020-11-01', 95), ('2020-11-08', 96), ('2020-11-15', 97), ('2020-11-22', 98), ('2020-11-29', 99)]
[('2020-03-01', 60), ('2020-03-08', 61), ('2020-03-15', 62), ('2020-03-22

In [23]:
#printing the dict
months_dates_dict

{'2019-07': [('2019-07-07', 26),
  ('2019-07-14', 27),
  ('2019-07-21', 28),
  ('2019-07-28', 29)],
 '2019-01': [('2019-01-06', 0),
  ('2019-01-13', 1),
  ('2019-01-20', 2),
  ('2019-01-27', 3)],
 '2019-08': [('2019-08-04', 30),
  ('2019-08-11', 31),
  ('2019-08-18', 32),
  ('2019-08-25', 33)],
 '2020-05': [('2020-05-03', 69),
  ('2020-05-10', 70),
  ('2020-05-17', 71),
  ('2020-05-24', 72),
  ('2020-05-31', 73)],
 '2020-10': [('2020-10-04', 91),
  ('2020-10-11', 92),
  ('2020-10-18', 93),
  ('2020-10-25', 94)],
 '2019-05': [('2019-05-05', 17),
  ('2019-05-12', 18),
  ('2019-05-19', 19),
  ('2019-05-26', 20)],
 '2019-04': [('2019-04-07', 13),
  ('2019-04-14', 14),
  ('2019-04-21', 15),
  ('2019-04-28', 16)],
 '2020-06': [('2020-06-07', 74),
  ('2020-06-14', 75),
  ('2020-06-21', 76),
  ('2020-06-28', 77)],
 '2020-02': [('2020-02-02', 56),
  ('2020-02-09', 57),
  ('2020-02-16', 58),
  ('2020-02-23', 59)],
 '2020-01': [('2020-01-05', 52),
  ('2020-01-12', 53),
  ('2020-01-19', 54),
  ('2

In [24]:
#defining function for appending the covid data to each state df
def covid_data_append(state_index):
    global state_date_list
    state_date_list = []
    for date in dates_list:
        try:
            state_date_list.append(covid_cols_df[covid_cols_df['state'] == state_codes[state_index]].loc[date, 'positive_cases'])
        except KeyError:
            print(date)
            state_date_list.append('n/a')
            continue  
    states_df_dict[f'{state_codes[state_index]}_data_df']['covid_cases'] = state_date_list
    print(states_df_dict[f'{state_codes[state_index]}_data_df'].tail(20))

In [25]:
#calling funct for states dfs index 0-11
for x in range(0, 11):
    covid_data_append(x)

2019-01-06
2019-01-13
2019-01-20
2019-01-27
2019-02-03
2019-02-10
2019-02-17
2019-02-24
2019-03-03
2019-03-10
2019-03-17
2019-03-24
2019-03-31
2019-04-07
2019-04-14
2019-04-21
2019-04-28
2019-05-05
2019-05-12
2019-05-19
2019-05-26
2019-06-02
2019-06-09
2019-06-16
2019-06-23
2019-06-30
2019-07-07
2019-07-14
2019-07-21
2019-07-28
2019-08-04
2019-08-11
2019-08-18
2019-08-25
2019-09-01
2019-09-08
2019-09-15
2019-09-22
2019-09-29
2019-10-06
2019-10-13
2019-10-20
2019-10-27
2019-11-03
2019-11-10
2019-11-17
2019-11-24
2019-12-01
2019-12-08
2019-12-15
2019-12-22
2019-12-29
2020-01-05
2020-01-12
2020-01-19
2020-01-26
2020-02-02
2020-02-09
2020-02-16
2020-02-23
2020-03-01
           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  73     80              72         58   
84   2020-08-16                  72     80              72         72   
85   2020-08-23                  75     76              69         81   
86   2020-08-30                  74  

           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  97     79              89         87   
84   2020-08-16                  93     78              88         87   
85   2020-08-23                  96     78              86         92   
86   2020-08-30                  96     79              85         93   
87   2020-09-06                  93     74              83         80   
88   2020-09-13                  94     77              78         86   
89   2020-09-20                  86     74              77         88   
90   2020-09-27                  88     72              79         80   
91   2020-10-04                  86     70              76         66   
92   2020-10-11                  83     73              76         64   
93   2020-10-18                  82     70              75         68   
94   2020-10-25                  85     68              74         65   
95   2020-11-01                  78     60         

           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  86     88              85         64   
84   2020-08-16                  87     87              85         75   
85   2020-08-23                  91     87              85         79   
86   2020-08-30                  86     80              87         80   
87   2020-09-06                  85     80              85         78   
88   2020-09-13                  87     82              84         84   
89   2020-09-20                  82     78              84         82   
90   2020-09-27                  81     74              78         73   
91   2020-10-04                  82     74              77         68   
92   2020-10-11                  73     72              76         61   
93   2020-10-18                  74     75              73         60   
94   2020-10-25                  75     70              71         60   
95   2020-11-01                  71     64         

           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  77     92              81         53   
84   2020-08-16                  79     92              86         53   
85   2020-08-23                  81     88              79         55   
86   2020-08-30                  83     84              83         66   
87   2020-09-06                  82     88              78         77   
88   2020-09-13                  85     83              76         82   
89   2020-09-20                  81     79              75         81   
90   2020-09-27                  84     81              76         74   
91   2020-10-04                  83     78              72         69   
92   2020-10-11                  76     78              75         59   
93   2020-10-18                  76     81              72         57   
94   2020-10-25                  76     78              71         56   
95   2020-11-01                  70     67         

           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  57     64              66         39   
84   2020-08-16                  58     65              70         39   
85   2020-08-23                  61     66              73         42   
86   2020-08-30                  61     68              64         44   
87   2020-09-06                  59     57              61         55   
88   2020-09-13                  58     59              64         46   
89   2020-09-20                  58     60              65         54   
90   2020-09-27                  58     58              59         46   
91   2020-10-04                  55     58              61         40   
92   2020-10-11                  53     56              55         43   
93   2020-10-18                  50     58              61         41   
94   2020-10-25                  54     57              61         40   
95   2020-11-01                  48     48         

           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  90     88              90         69   
84   2020-08-16                  90     90              87         82   
85   2020-08-23                  91     85              88         91   
86   2020-08-30                  86     82              87         90   
87   2020-09-06                  87     84              86         85   
88   2020-09-13                  86     84              84         90   
89   2020-09-20                  83     81              86         88   
90   2020-09-27                  81     81              83         79   
91   2020-10-04                  86     79              83         77   
92   2020-10-11                  77     78              82         66   
93   2020-10-18                  81     77              79         69   
94   2020-10-25                  79     74              80         65   
95   2020-11-01                  71     63         

In [None]:
#defining a funct for appending whether stay at home orders or gathering bans are happening
def value_changer(index_no):
    print(sah_data_df[sah_data_df['state_name'] == state_names[index_no]]) 
    query = input('is there a stay at home order? Yes/No')
    if query == 'Yes':
        start_month = input('start month')
        end_month = input('end month')
        print(months_dates_dict[f'2020-{start_month}'])
        print(months_dates_dict[f'2020-{end_month}'])
        val1 = int(input('start index'))
        val2 = int(input('end index(first index after the end of the ban)'))
        response = input('gathering ban? Yes/No')
        if response == 'Yes':
            for x in range(val1, val2):
                states_df_dict[f'{state_codes[index_no]}_data_df'].iat[x, -2] = 'True'
                states_df_dict[f'{state_codes[index_no]}_data_df'].iat[x, -1] = 'True'
        elif response == 'No':
            for x in range(val1, val2):
                states_df_dict[f'{state_codes[index_no]}_data_df'].iat[x, -2] = 'True'
        print(states_df_dict[f'{state_codes[index_no]}_data_df'].iloc[(val1-2):(val2+2), :])
    elif query == 'No':
        print(f'{state_codes[index_no]}_data_df has no bans')
        print(states_df_dict[f'{state_codes[index_no]}_data_df'].tail(10))
    else:
        ask = input('Did you make a mistake?Yes/No')
        if ask == 'Yes':
            value_changer(index_no)
        else:
            print(f'{state_codes[index_no]}_data_df has no bans')
            print(states_df_dict[f'{state_codes[index_no]}_data_df'].tail(10))

In [None]:
#calling the funct for all of the states ind 0-11
for x in range(0, 11):
    value_changer(x)

In [None]:
#test re-reading back in
path = f'Resources/US-AK_cat_data.csv'
test_AK_df = pd.read_csv(path)
test_AK_df.tail(20)