In [1]:
#dependencies
import pandas as pd
import numpy as np
from datetime import datetime as dt

In [2]:
#reading in the covid data file
path = 'input_data/covid_data/all-states-history-updated.csv'
all_covid_data_df = pd.read_csv(path, low_memory=False)
all_covid_data_df.head()

Unnamed: 0,date,state,dataQualityGrade,death,deathConfirmed,deathIncrease,deathProbable,hospitalized,hospitalizedCumulative,hospitalizedCurrently,...,totalTestResults,totalTestResultsIncrease,totalTestsAntibody,totalTestsAntigen,totalTestsPeopleAntibody,totalTestsPeopleAntigen,totalTestsPeopleViral,totalTestsPeopleViralIncrease,totalTestsViral,totalTestsViralIncrease
0,2021-01-04,AK,A,218.0,218.0,3,,1033.0,1033.0,97.0,...,1297498.0,3661,,,,,,0,1297498.0,3661
1,2021-01-04,AL,A,4878.0,4259.0,5,619.0,35430.0,35430.0,3064.0,...,1903388.0,3318,,,88653.0,,1903388.0,3318,,0
2,2021-01-04,AR,A+,3800.0,3178.0,51,622.0,11514.0,11514.0,1296.0,...,2095621.0,5424,,21856.0,,254026.0,,0,2095621.0,5424
3,2021-01-04,AS,D,0.0,,0,,,,,...,2140.0,0,,,,,,0,2140.0,0
4,2021-01-04,AZ,A+,9064.0,8173.0,3,891.0,39897.0,39897.0,4647.0,...,5288658.0,31186,393204.0,,,,2937589.0,14822,5288658.0,31186


In [3]:
#displaying available columns
all_covid_data_df.columns

Index(['date', 'state', 'dataQualityGrade', 'death', 'deathConfirmed',
       'deathIncrease', 'deathProbable', 'hospitalized',
       'hospitalizedCumulative', 'hospitalizedCurrently',
       'hospitalizedIncrease', 'inIcuCumulative', 'inIcuCurrently', 'negative',
       'negativeIncrease', 'negativeTestsAntibody',
       'negativeTestsPeopleAntibody', 'negativeTestsViral',
       'onVentilatorCumulative', 'onVentilatorCurrently', 'positive',
       'positiveCasesViral', 'positiveIncrease', 'positiveScore',
       'positiveTestsAntibody', 'positiveTestsAntigen',
       'positiveTestsPeopleAntibody', 'positiveTestsPeopleAntigen',
       'positiveTestsViral', 'recovered', 'totalTestEncountersViral',
       'totalTestEncountersViralIncrease', 'totalTestResults',
       'totalTestResultsIncrease', 'totalTestsAntibody', 'totalTestsAntigen',
       'totalTestsPeopleAntibody', 'totalTestsPeopleAntigen',
       'totalTestsPeopleViral', 'totalTestsPeopleViralIncrease',
       'totalTestsViral'

In [4]:
#creating trimmed dict with relevant columns
covid_cols_dict = {
    'date': [x for x in all_covid_data_df['date']],
    'state': [x for x in all_covid_data_df['state']],
    'positive_cases': [x for x in all_covid_data_df['positive']],
    'total_tests': [x for x in all_covid_data_df['totalTestResults']]
}

In [5]:
#creating a df
covid_cols_df = pd.DataFrame(covid_cols_dict)
covid_cols_df.head()

Unnamed: 0,date,state,positive_cases,total_tests
0,2021-01-04,AK,46812.0,1297498.0
1,2021-01-04,AL,374095.0,1903388.0
2,2021-01-04,AR,234781.0,2095621.0
3,2021-01-04,AS,0.0,2140.0
4,2021-01-04,AZ,561542.0,5288658.0


In [6]:
#creating a new col in covid cols df for %age of positive cases
covid_cols_df['percentage_positive'] = (covid_cols_df['positive_cases']/covid_cols_df['total_tests']) * 100
covid_cols_df.head()

Unnamed: 0,date,state,positive_cases,total_tests,percentage_positive
0,2021-01-04,AK,46812.0,1297498.0,3.607867
1,2021-01-04,AL,374095.0,1903388.0,19.654164
2,2021-01-04,AR,234781.0,2095621.0,11.203409
3,2021-01-04,AS,0.0,2140.0,0.0
4,2021-01-04,AZ,561542.0,5288658.0,10.617854


In [7]:
#reset index as the dates
covid_cols_df = covid_cols_df.set_index('date')

In [8]:
#showing the head of the covid cols df as a check
covid_cols_df.head()

Unnamed: 0_level_0,state,positive_cases,total_tests,percentage_positive
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-04,AK,46812.0,1297498.0,3.607867
2021-01-04,AL,374095.0,1903388.0,19.654164
2021-01-04,AR,234781.0,2095621.0,11.203409
2021-01-04,AS,0.0,2140.0,0.0
2021-01-04,AZ,561542.0,5288658.0,10.617854


In [9]:
#output the condensed covid data
output_path = 'covid_condensed.csv'
covid_cols_df.to_csv(output_path, index=False)

In [10]:
#reading in the state abbreviations
path = 'input_data/google_trends_data/statesAbbrev.csv'
statesAbbrev_df = pd.read_csv(path)
statesAbbrev_df.tail(12)

Unnamed: 0,State,Abbrev,Code
39,Rhode Island,R.I.,RI
40,South Carolina,S.C.,SC
41,South Dakota,S.D.,SD
42,Tennessee,Tenn.,TN
43,Texas,Tex.,TX
44,Utah,Utah,UT
45,Vermont,Vt.,VT
46,Virginia,Va.,VA
47,Washington,Wash.,WA
48,West Virginia,W.Va.,WV


In [11]:
#list for state codes
state_codes = [x for x in statesAbbrev_df['Code']]
state_codes

['AL',
 'AK',
 'AZ',
 'AR',
 'CA',
 'CO',
 'CT',
 'DE',
 'DC',
 'FL',
 'GA',
 'HI',
 'ID',
 'IL',
 'IN',
 'IA',
 'KS',
 'KY',
 'LA',
 'ME',
 'MD',
 'MA',
 'MI',
 'MN',
 'MS',
 'MO',
 'MT',
 'NE',
 'NV',
 'NH',
 'NJ',
 'NM',
 'NY',
 'NC',
 'ND',
 'OH',
 'OK',
 'OR',
 'PA',
 'RI',
 'SC',
 'SD',
 'TN',
 'TX',
 'UT',
 'VT',
 'VA',
 'WA',
 'WV',
 'WI',
 'WY']

In [12]:
#list for state names
state_names = [x for x in statesAbbrev_df['State']]
state_names[11]

'Hawaii'

In [13]:
#finding the codes for each of the states concerned here
state_codes[11], state_codes[31], state_codes[32], state_codes[33], state_codes[34], state_codes[35], state_codes[36], state_codes[37], state_codes[38], state_codes[39]

('HI', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI')

In [14]:
#reading in the stay at home order df
path = 'input_data/covid_data/full_SAH_orders.csv'
sah_data_df = pd.read_csv(path)
sah_data_df.head()

Unnamed: 0,state_name,start_date,end_date,gathering_bans,previous_orders_start_date,previous_orders_end_date
0,Hawaii,4/25/20,5/31/20,,,
1,Washington,4/2/20,5/4/20,,,
2,Montana,4/24/20,4/26/20,More than 10,,
3,Maine,4/2/20,4/30/20,,,
4,North Dakota,,,,,


In [15]:
#creating a df dict to hold the state dfs, reading in the dfs
states_df_dict = {}
for x in range(30, 40):
    path = f'input_data/states_combined_data/US-{state_codes[x]}_cat_data.csv'
    states_df_dict[f'{state_codes[x]}_data_df'] = pd.read_csv(path)

In [16]:
#testing to see how many states lie in the df dict
len(states_df_dict)

10

In [17]:
#inspecting the keys (ie df names)
states_df_dict.keys()

dict_keys(['NJ_data_df', 'NM_data_df', 'NY_data_df', 'NC_data_df', 'ND_data_df', 'OH_data_df', 'OK_data_df', 'OR_data_df', 'PA_data_df', 'RI_data_df'])

In [18]:
#creating a list of all the dates the states df have cat data for
dates_list = [x for x in states_df_dict[f'{state_codes[32]}_data_df']['time']]
dates_list

['2019-01-06',
 '2019-01-13',
 '2019-01-20',
 '2019-01-27',
 '2019-02-03',
 '2019-02-10',
 '2019-02-17',
 '2019-02-24',
 '2019-03-03',
 '2019-03-10',
 '2019-03-17',
 '2019-03-24',
 '2019-03-31',
 '2019-04-07',
 '2019-04-14',
 '2019-04-21',
 '2019-04-28',
 '2019-05-05',
 '2019-05-12',
 '2019-05-19',
 '2019-05-26',
 '2019-06-02',
 '2019-06-09',
 '2019-06-16',
 '2019-06-23',
 '2019-06-30',
 '2019-07-07',
 '2019-07-14',
 '2019-07-21',
 '2019-07-28',
 '2019-08-04',
 '2019-08-11',
 '2019-08-18',
 '2019-08-25',
 '2019-09-01',
 '2019-09-08',
 '2019-09-15',
 '2019-09-22',
 '2019-09-29',
 '2019-10-06',
 '2019-10-13',
 '2019-10-20',
 '2019-10-27',
 '2019-11-03',
 '2019-11-10',
 '2019-11-17',
 '2019-11-24',
 '2019-12-01',
 '2019-12-08',
 '2019-12-15',
 '2019-12-22',
 '2019-12-29',
 '2020-01-05',
 '2020-01-12',
 '2020-01-19',
 '2020-01-26',
 '2020-02-02',
 '2020-02-09',
 '2020-02-16',
 '2020-02-23',
 '2020-03-01',
 '2020-03-08',
 '2020-03-15',
 '2020-03-22',
 '2020-03-29',
 '2020-04-05',
 '2020-04-

In [19]:
#creating a list of unique year-month combos
all_month_year = [x[0:7] for x in dates_list]
month_year = list(set(all_month_year))
month_year

['2019-03',
 '2019-07',
 '2020-09',
 '2020-01',
 '2019-06',
 '2019-05',
 '2019-04',
 '2020-06',
 '2019-10',
 '2019-09',
 '2020-03',
 '2019-12',
 '2019-11',
 '2020-12',
 '2019-08',
 '2020-04',
 '2020-02',
 '2020-11',
 '2020-10',
 '2020-08',
 '2020-05',
 '2020-07',
 '2019-01',
 '2019-02']

In [20]:
#function for appending dates per month, and their indices, to a month_date_dict
months_dates_dict = {}
def date_dict_append(m):
    month_date_list = []
    for x in range(0, len(dates_list)):
        if dates_list[x][0:7] == month_year[m]:
            month_date_list.append((dates_list[x], x))
    global months_dates_dict
    months_dates_dict[f'{month_year[m]}'] = month_date_list
    print(month_date_list)

In [21]:
#calling the function for each month in the month-year dict
for m in range(0, len(month_year)):
    date_dict_append(m)

[('2019-03-03', 8), ('2019-03-10', 9), ('2019-03-17', 10), ('2019-03-24', 11), ('2019-03-31', 12)]
[('2019-07-07', 26), ('2019-07-14', 27), ('2019-07-21', 28), ('2019-07-28', 29)]
[('2020-09-06', 87), ('2020-09-13', 88), ('2020-09-20', 89), ('2020-09-27', 90)]
[('2020-01-05', 52), ('2020-01-12', 53), ('2020-01-19', 54), ('2020-01-26', 55)]
[('2019-06-02', 21), ('2019-06-09', 22), ('2019-06-16', 23), ('2019-06-23', 24), ('2019-06-30', 25)]
[('2019-05-05', 17), ('2019-05-12', 18), ('2019-05-19', 19), ('2019-05-26', 20)]
[('2019-04-07', 13), ('2019-04-14', 14), ('2019-04-21', 15), ('2019-04-28', 16)]
[('2020-06-07', 74), ('2020-06-14', 75), ('2020-06-21', 76), ('2020-06-28', 77)]
[('2019-10-06', 39), ('2019-10-13', 40), ('2019-10-20', 41), ('2019-10-27', 42)]
[('2019-09-01', 34), ('2019-09-08', 35), ('2019-09-15', 36), ('2019-09-22', 37), ('2019-09-29', 38)]
[('2020-03-01', 60), ('2020-03-08', 61), ('2020-03-15', 62), ('2020-03-22', 63), ('2020-03-29', 64)]
[('2019-12-01', 47), ('2019-12-

In [22]:
#printing the dict
months_dates_dict

{'2019-03': [('2019-03-03', 8),
  ('2019-03-10', 9),
  ('2019-03-17', 10),
  ('2019-03-24', 11),
  ('2019-03-31', 12)],
 '2019-07': [('2019-07-07', 26),
  ('2019-07-14', 27),
  ('2019-07-21', 28),
  ('2019-07-28', 29)],
 '2020-09': [('2020-09-06', 87),
  ('2020-09-13', 88),
  ('2020-09-20', 89),
  ('2020-09-27', 90)],
 '2020-01': [('2020-01-05', 52),
  ('2020-01-12', 53),
  ('2020-01-19', 54),
  ('2020-01-26', 55)],
 '2019-06': [('2019-06-02', 21),
  ('2019-06-09', 22),
  ('2019-06-16', 23),
  ('2019-06-23', 24),
  ('2019-06-30', 25)],
 '2019-05': [('2019-05-05', 17),
  ('2019-05-12', 18),
  ('2019-05-19', 19),
  ('2019-05-26', 20)],
 '2019-04': [('2019-04-07', 13),
  ('2019-04-14', 14),
  ('2019-04-21', 15),
  ('2019-04-28', 16)],
 '2020-06': [('2020-06-07', 74),
  ('2020-06-14', 75),
  ('2020-06-21', 76),
  ('2020-06-28', 77)],
 '2019-10': [('2019-10-06', 39),
  ('2019-10-13', 40),
  ('2019-10-20', 41),
  ('2019-10-27', 42)],
 '2019-09': [('2019-09-01', 34),
  ('2019-09-08', 35),
  (

In [23]:
#defining function for appending the covid data to each state df
def covid_data_append(state_index):
    global state_date_list
    state_date_list = []
    for date in dates_list:
        try:
            state_date_list.append(covid_cols_df[covid_cols_df['state'] == state_codes[state_index]].loc[date, 'positive_cases'])
        except KeyError:
            print(date)
            state_date_list.append('n/a')
            continue  
    states_df_dict[f'{state_codes[state_index]}_data_df']['covid_cases'] = state_date_list
    print(states_df_dict[f'{state_codes[state_index]}_data_df'].tail(20))

In [24]:
#calling funct for states dfs index 40-50
for x in range(30, 40):
    covid_data_append(x)

2019-01-06
2019-01-13
2019-01-20
2019-01-27
2019-02-03
2019-02-10
2019-02-17
2019-02-24
2019-03-03
2019-03-10
2019-03-17
2019-03-24
2019-03-31
2019-04-07
2019-04-14
2019-04-21
2019-04-28
2019-05-05
2019-05-12
2019-05-19
2019-05-26
2019-06-02
2019-06-09
2019-06-16
2019-06-23
2019-06-30
2019-07-07
2019-07-14
2019-07-21
2019-07-28
2019-08-04
2019-08-11
2019-08-18
2019-08-25
2019-09-01
2019-09-08
2019-09-15
2019-09-22
2019-09-29
2019-10-06
2019-10-13
2019-10-20
2019-10-27
2019-11-03
2019-11-10
2019-11-17
2019-11-24
2019-12-01
2019-12-08
2019-12-15
2019-12-22
2019-12-29
2020-01-05
2020-01-12
2020-01-19
2020-01-26
2020-02-02
2020-02-09
           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  84     94              78         53   
84   2020-08-16                  81     92              76         53   
85   2020-08-23                  82     87              75         55   
86   2020-08-30                  81     85              78         59 

2019-03-03
2019-03-10
2019-03-17
2019-03-24
2019-03-31
2019-04-07
2019-04-14
2019-04-21
2019-04-28
2019-05-05
2019-05-12
2019-05-19
2019-05-26
2019-06-02
2019-06-09
2019-06-16
2019-06-23
2019-06-30
2019-07-07
2019-07-14
2019-07-21
2019-07-28
2019-08-04
2019-08-11
2019-08-18
2019-08-25
2019-09-01
2019-09-08
2019-09-15
2019-09-22
2019-09-29
2019-10-06
2019-10-13
2019-10-20
2019-10-27
2019-11-03
2019-11-10
2019-11-17
2019-11-24
2019-12-01
2019-12-08
2019-12-15
2019-12-22
2019-12-29
2020-01-05
2020-01-12
2020-01-19
2020-01-26
2020-02-02
2020-02-09
2020-02-16
2020-02-23
2020-03-01
           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  81     81              88         53   
84   2020-08-16                  77     76              85         56   
85   2020-08-23                  76     72              79         53   
86   2020-08-30                  78     74              82         57   
87   2020-09-06                  82     78          

           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  78     78              84         51   
84   2020-08-16                  78     76              80         61   
85   2020-08-23                  78     71              71         64   
86   2020-08-30                  86     71              76         76   
87   2020-09-06                  84     71              75         76   
88   2020-09-13                  83     72              78         80   
89   2020-09-20                  86     69              74         76   
90   2020-09-27                  86     67              73         74   
91   2020-10-04                  83     66              79         65   
92   2020-10-11                  81     65              72         71   
93   2020-10-18                  86     71              69         57   
94   2020-10-25                  89     66              75         65   
95   2020-11-01                  79     54         

           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  71     78              71         52   
84   2020-08-16                  72     78              73         61   
85   2020-08-23                  77     77              71         75   
86   2020-08-30                  78     72              70         76   
87   2020-09-06                  74     75              71         73   
88   2020-09-13                  73     69              70         84   
89   2020-09-20                  73     71              67         72   
90   2020-09-27                  71     71              67         68   
91   2020-10-04                  72     70              68         71   
92   2020-10-11                  68     67              66         54   
93   2020-10-18                  68     64              63         56   
94   2020-10-25                  66     60              60         50   
95   2020-11-01                  62     58         

           time  arts_entertainment  autos  beauty_fitness  books_lit  \
83   2020-08-09                  80     80              85         53   
84   2020-08-16                  80     79              82         53   
85   2020-08-23                  81     77              81         59   
86   2020-08-30                  82     72              79         74   
87   2020-09-06                  79     70              77         75   
88   2020-09-13                  80     67              76         81   
89   2020-09-20                  83     69              80         79   
90   2020-09-27                  80     68              75         71   
91   2020-10-04                  80     67              76         68   
92   2020-10-11                  75     67              73         58   
93   2020-10-18                  75     64              70         56   
94   2020-10-25                  78     63              73         61   
95   2020-11-01                  73     55         

_Guide for dealing with the value_changer function:_

1 - When you run this function, you'll get the question 'Is there a stay at home order?', along with an initial printout showing the rows of the df with stay at home information for the specific state you're running the function on. Look at the output under 'start_date' and 'end_date'. If there are dates under those fields, there is a stay at home order, so enter 'Yes' into the input box. If it says NaN under 'start_date' and 'end_date', enter 'No'. ***Capitalisation matters throughout this function - please capitalise the first letter. Meaning, 'Yes' or 'No', not 'yes' or 'no'.***
(If you enter No, you'll get a acknowlegement that the specific state has no stay at home orders, then the function will move on to the next state you entered into the function, and you'll need to start with step 1 for that new state)

2 - If you entered 'Yes' for the first question, you'll get a request to enter a start month (and then, an end month). First, check the original printout you got along with the first question. Check the fields 'previous_orders_start_date' and 'previous_orders_end_date'. If there are dates under those fields, that is where you will find your start month information. If there are NaNs under those fields, you'll take your start and end month values from the 'start_date' and 'end_date' fields. You will need to check whether there is a gap in time between the 'previous_orders_end_date' and the 'start_date' (for example, if the 'previous_orders_end_date' is 5/12/2020, and the 'start_date' is '5/13/2020', there is no gap; but if the 'previous_orders_end_date' is 4/10/2020 and the 'start_date' is 5/17/2020, there is a gap). If there is a gap, use the month in 'previous_orders_end_date' as your end month. If there is no gap, the end month is in the 'end_date' field.
To get the indices from the particular fields, look at the month in the date (dates here are mm/dd/yyyy). If the month is 1-9, you will need to input a zero before the number when filling in the 'start index' input (ex: date 4/7/2020 - enter 04 for the start index), but if the number is 10-12, just enter it as normal.

3 - You'll then get a printout of pairs of numbers, which are dates, and indices that correspond to them. These are dates for which we have google trends data. You'll also get a request for a start index (and then after, an end index). You'll want to compare the ouput you got with question 1, with this printout. For the start index, you want to find the date in the new printout that is the same date, or the next available date, as the date you used as your start month. These dates are in the format yyyy-mm-dd. For example, if my start date is 4/7/2020, and I am presented with the following number pairs (('2020-04-05', 65), ('2020-04-12', 66), ('2020-04-19', 67), ('2020-04-26', 68)), I want to find the date that is either the 7th of April, or the first date that comes after it. In this case, I would choose the pair ('2020-04-12', 66). I would then input the index from this pair, which is 66, into the input box. For the end date, you want to find the first date/index pair that is after the date you used for your end month. For example, if my end date is 12/10/2020, and I was presented with the pairs (('2020-12-06', 100), ('2020-12-13', 101), ('2020-12-20', 102)), I would want to look for the first date after the 10th of December, which here would be ('2020-12-13', 101). I would then input the index from this pair, 101, into the input box. 

4 - You'll then get another question, 'is there another non-overlapping stay at home order? If you discerned in step 2 that there is a gap between the previous orders and the start_date of the most recent order, enter 'Yes' into the input box. You'll then go through step 3 again, but you'll use the 'start_date' and 'end_date' fields as your start month and end month, instead of the 'previous_orders_start_date' and 'previous_orders_end_date' you already used in step 3. 
(If there is no gap, enter 'No', you'll get an acknowlegement that the state you're working on only has one ban, and you'll continue on to step 5.)

5 - You'll get the question, 'gathering ban?'. Refer back to the first printout you got, to see if there is any information under the field 'gathering ban'. If there is information such as 'more than 10', or 'any size', or any actual description, enter 'Yes' into the input box. If it says NaN under 'gathering ban', input 'No'. 
Whether you put Yes or No, the next thing you will see is a printout of the last ten rows of the df of the state you've been working on, as a way of seeing whether the changes you made above 'took' (i.e.: are there now Trues and Falses in the columns 'stay_at_home' and 'mass_gathering_ban'?

After this printout, you'll be at the next state you've called the function on, and will repeat steps 1-5 for that state.

In [36]:
#reset index to be State Name
sah_data_df.set_index('state_name', inplace=True)


KeyError: "None of ['state_name'] are in the columns"

In [46]:

def build_baninfo_toappend(index_no):
    #print(index_no)
    #print(sah_data_df[sah_data_df['state_name']== state_names[index_no]])
    #print(sah_data_df.loc[state_names[index_no]:])
    current_sah_df = sah_data_df.loc[state_names[index_no]]
    current_dict = states_df_dict[f'{state_codes[index_no]}_data_df']
    
    if pd.isnull(current_sah_df[0]):
        print(f'{state_codes[index_no]}_data_df has no bans')
        print(current_dict.tail(10))
            
        #inputs result for Mass Gathering Ban & SAH orders, when none
        for x in range(len(current_dict)):
            current_dict.iat[x, -1] = 'False'
            current_dict.iat[x, -2] = 'False'
    else:
        #start_month = pd.to_datetime(sah_data_df['start_date']).dt.strftime('%m')
        #end_month = pd.to_datetime(sah_data_df['end_date']).dt.strftime('%m')
        if current_sah_df[1] == 'Until further notice':
            start_date = current_sah_df[0]
            end_date = "2020-12-31"
        else:
            start_date = current_sah_df[0]
            end_date = current_sah_df[1]
        
        for x in range(len(current_dict)):
            print (x,current_dict.iloc[x]['time'],start_date, end_date, current_dict.iloc[x]['time'] >= start_date and current_dict.iloc[x]['time'] <= end_date)
            if current_dict.iloc[x]['time'] >= start_date and current_dict.iloc[x]['time'] <= end_date:
                current_dict.iat[x, -2] = 'True'
            else:
                current_dict.iat[x, -2] = 'False'
         
            #checks for mass gathering ban
            if pd.isnull(current_sah_df[2])!= True:
                if current_dict.iloc[x]['time'] >= start_date and current_dict.iloc[x]['time'] <= end_date:
                    #if falls within date range, set MGB to True
                    current_dict.iat[x, -1] = 'True'
                else:    
                    #if falls outside date range, set BGC to False
                    current_dict.iat[x, -1] = 'False'
            else:
                current_dict_iat[x,-2] = 'False'
                
    #print(current_dict)

In [47]:
state_names[32]
#print(states_df_dict[f'NY_data_df'].iloc[0]['time'])
#print(sah_data_df.loc['Hawaii'])  
#sah_data_df.set_index("state_name",inplace=True)
# pd.isnull(sah_data_df.loc[state_names[index_no]][0])
# sah_data_df.loc['Hawaii'][[2]]
for st in range(32,33):
    build_baninfo_toappend(st)


0 2019-01-06 4/7/20 4/29/20 False
1 2019-01-13 4/7/20 4/29/20 False
2 2019-01-20 4/7/20 4/29/20 False
3 2019-01-27 4/7/20 4/29/20 False
4 2019-02-03 4/7/20 4/29/20 False
5 2019-02-10 4/7/20 4/29/20 False
6 2019-02-17 4/7/20 4/29/20 False
7 2019-02-24 4/7/20 4/29/20 False
8 2019-03-03 4/7/20 4/29/20 False
9 2019-03-10 4/7/20 4/29/20 False
10 2019-03-17 4/7/20 4/29/20 False
11 2019-03-24 4/7/20 4/29/20 False
12 2019-03-31 4/7/20 4/29/20 False
13 2019-04-07 4/7/20 4/29/20 False
14 2019-04-14 4/7/20 4/29/20 False
15 2019-04-21 4/7/20 4/29/20 False
16 2019-04-28 4/7/20 4/29/20 False
17 2019-05-05 4/7/20 4/29/20 False
18 2019-05-12 4/7/20 4/29/20 False
19 2019-05-19 4/7/20 4/29/20 False
20 2019-05-26 4/7/20 4/29/20 False
21 2019-06-02 4/7/20 4/29/20 False
22 2019-06-09 4/7/20 4/29/20 False
23 2019-06-16 4/7/20 4/29/20 False
24 2019-06-23 4/7/20 4/29/20 False
25 2019-06-30 4/7/20 4/29/20 False
26 2019-07-07 4/7/20 4/29/20 False
27 2019-07-14 4/7/20 4/29/20 False
28 2019-07-21 4/7/20 4/29/20 F

In [48]:
#output the dfs
for x in range(32, 33):
    output_path = f'input_data/states_combined_data/US-{state_codes[x]}_cat_data.csv'
    states_df_dict[f'{state_codes[x]}_data_df'].to_csv(output_path, index=False)