# **ANALYSIS OF THE COVID-19 POLICY DECISIONS IN THE UNITED STATES**

*Policy decision/dates, and the csv files for this work, were taken from: "COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University" or "JHU CSSE COVID-19 Data" for short, and the url: https://github.com/CSSEGISandData/COVID-19 
https://coronavirus.jhu.edu/data/state-timeline

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
%matplotlib inline

In [None]:
infile1 = "../input/covid-19-time-series-john-hopkins-data/time_series_covid19_deaths_US.csv"
infile2 = "../input/covid-19-time-series-john-hopkins-data/time_series_covid19_confirmed_US.csv"
infile3 = "../input/covid-19-time-series-john-hopkins-data/07-09-2021.csv"
infile4 = "../input/covid-19-time-series-john-hopkins-data/04-21-2020.csv"
covid_data_deaths_df = pd.read_csv(infile1)
covid_data_confirmed_cases_df = pd.read_csv(infile2)
covid_data_daily_report_07_09_2021 = pd.read_csv(infile3)
covid_data_daily_report_04_21_2020 = pd.read_csv(infile4)

In [None]:
covid_data_deaths_df.dropna(how='any')
covid_data_confirmed_cases_df.dropna(how='any')
covid_data_daily_report_07_09_2021.dropna(how='any')
pd.options.mode.chained_assignment = None
last_date = covid_data_confirmed_cases_df.columns[-1]
covid_data_daily_report_04_21_2020.head(5)

In [None]:
covid_data_deaths_df = covid_data_deaths_df.rename(columns={'Combined_Key':'City'})
covid_data_deaths_df = covid_data_deaths_df.drop(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Country_Region', 'Lat', 'Long_', 'Admin2'], axis=1)
covid_data_confirmed_cases_df = covid_data_confirmed_cases_df.rename(columns={'Combined_Key':'City'})
covid_data_confirmed_cases_df = covid_data_confirmed_cases_df.drop(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Country_Region', 'Lat', 'Long_', 'Admin2'], axis=1)

In [None]:
covid_data_deaths_df.head(5)

In [None]:
covid_data_confirmed_cases_df = covid_data_confirmed_cases_df.set_index('City')
covid_data_confirmed_cases_df.head(5)

In [None]:
covid_data_deaths_df = covid_data_deaths_df.set_index('City')
covid_data_deaths_df.head(5)

In [None]:
covid_data_deaths_df['total_deaths_per_100k'] = (covid_data_deaths_df[last_date] / covid_data_deaths_df['Population'] ) * (100000)
covid_data_deaths_df['total_deaths_per_100k'] = covid_data_deaths_df['total_deaths_per_100k'].fillna(0)
covid_data_deaths_df['total_deaths_per_100k'].head(5)
print('Queens deaths per 100k: ', covid_data_deaths_df.loc['Queens, New York, US', 'total_deaths_per_100k'])

In [None]:
columns = covid_data_deaths_df.columns.tolist()
columns = columns[:2] + columns[-1:] + columns[2:-1]
covid_data_deaths_df = covid_data_deaths_df[columns]
covid_data_deaths_df.head(5)

In [None]:
def fix_columns():
    '''Function to prepare the columns for analysis'''
    
    columns_to_plot = []
    for date in columns:
        if date == 'Province_State' or date == 'Population' or date == 'total_deaths_per_100k':
            continue
        else:
            columns_to_plot.append(date)
    return columns_to_plot 
    
columns_to_plot = fix_columns()
my_arr = np.arange(len(columns_to_plot))

In [None]:
xticks_dates = []
for index, date in zip(my_arr, columns[3:]):
    xticks_dates.append(date)
    
xticks_dates_labels = np.array(xticks_dates) 
xticks_dates_labels_new = []
for i in range(0, len(xticks_dates_labels), 15):
    xticks_dates_labels_new.append(xticks_dates_labels[i])

**VISUALIZATION # 1**

In [None]:
def vis1():
    '''This function plots Covid-19 Total Confirmed Deaths per Day (US) 
    and Covid-19 Positive Confirmed Cases per Day (US)'''
    
    #Total Covid-19 deaths per county
    miami_total_deaths = covid_data_deaths_df.loc['Miami-Dade, Florida, US']
    san_francisco_total_deaths = covid_data_deaths_df.loc['San Francisco, California, US']
    los_angeles_total_deaths = covid_data_deaths_df.loc['Los Angeles, California, US']
    denver_total_deaths = covid_data_deaths_df.loc['Denver, Colorado, US']
    new_york_total_deaths = covid_data_deaths_df.loc['Queens, New York, US']
    dallas_total_deaths = covid_data_deaths_df.loc['Dallas, Texas, US']
    
    #Confirmed positive Covid-19 cases
    miami_total_confirmed_cases = covid_data_confirmed_cases_df.loc['Miami-Dade, Florida, US']
    san_francisco_confirmed_cases = covid_data_confirmed_cases_df.loc['San Francisco, California, US']
    los_angeles_confirmed_cases = covid_data_confirmed_cases_df.loc['Los Angeles, California, US']
    denver_confirmed_cases= covid_data_confirmed_cases_df.loc['Denver, Colorado, US']
    new_york_confirmed_cases = covid_data_confirmed_cases_df.loc['Queens, New York, US']
    dallas_confirmed_cases = covid_data_confirmed_cases_df.loc['Dallas, Texas, US']

    # Total Covid-19 confirmed cases per county
    fig, (ax1, ax2) = plt.subplots(2, 1,figsize=(12, 10), sharex=True,  constrained_layout=True,  dpi=70)
    
    ax1.plot(my_arr, miami_total_confirmed_cases[1:], label='Miami-Dade')
    ax1.plot(my_arr, san_francisco_confirmed_cases[1:], label='San Francisco')
    ax1.plot(my_arr, los_angeles_confirmed_cases[1:], label='Los Angeles')
    ax1.plot(my_arr, denver_confirmed_cases[1:], label='Denver')
    ax1.plot(my_arr, new_york_confirmed_cases[1:], label='Queens')
    ax1.plot(my_arr, dallas_confirmed_cases[1:], label='Dallas')
    ax1.legend()
    ax1.yaxis.set_major_formatter(FormatStrFormatter('%d'))
    ax1.set_ylim(0, 1500000, 100000)
    ax1.grid(True, ls='--')
    ax1.set_title('Covid-19 Positive Confirmed Cases per Day (US)')
    ax1.set_ylabel('Covid-19 Positive Confirmed Cases')
    plt.xticks(np.arange(0, len(columns[3:]), 15), xticks_dates_labels_new, rotation=90, fontsize=10)
    ax1.axvline(x=300, color='green')
    ax1.axvline(x=465, color='green')
    ax1.axvspan(300, 465, alpha=0.2, color='green')

    # Total Covid-19 deaths per county
    ax2.plot(my_arr, miami_total_deaths[3:], label='Miami-Dade')
    ax2.plot(my_arr, san_francisco_total_deaths[3:], label='San Francisco')
    ax2.plot(my_arr, los_angeles_total_deaths[3:], label='Los Angeles')
    ax2.plot(my_arr, denver_total_deaths[3:], label='Denver')
    ax2.plot(my_arr, new_york_total_deaths[3:], label='Queens')
    ax2.plot(my_arr, dallas_total_deaths[3:], label='Dallas')
    ax2.legend()
    ax2.yaxis.set_major_formatter(FormatStrFormatter('%d'))
    ax2.grid(True, ls='--')
    ax2.set_ylabel('Covid-19 Total Confirmed Deaths')
    ax2.set_title('Covid-19 Total Confirmed Deaths per Day (US)')
    plt.xticks(np.arange(0, len(columns[3:]), 15), xticks_dates_labels_new, rotation=90, fontsize=10)
    ax2.axvline(x=300, color='green')
    ax2.axvline(x=465, color='green')
    ax2.axvspan(300, 465, alpha=0.2, color='green')
    ax2.axvline(x=60, color='purple')
    ax2.axvline(x=120, color='purple')
    ax2.axvspan(60, 120, alpha=0.1, color='purple')
    plt.show()
    
vis1()

*(Shaded timeframe to emphasize two important hikes in confirmed deaths) 
What happened between November 17/2020 and May 05/2021? Why such a hike in cases and deaths in Los Angeles, California, but not in San Francisco?- One reason can be the size of the population. This is a commonality with New York, which also had stringent policy restrictions. Both cities are the most populated in the U.S.
The latest opening policy decision before the November death rate growth in Los Angeles, was made on October 20/2020/ The California Department of Public Health (CDPH) released new guidance and made updates to the Blueprint for a Safer Economy, easing some restrictions. But covid19 cases grew, as well as the death rate. Then some restrictions went back up:
NOV 19 : CURFEW FROM 10 TO 5 PM : Governor Gavin Newsom and the California Department of Public Health (CDPH) announced a limited Stay at Home Order requiring generally that non-essential work, movement and gatherings stop between 10 PM and 5 AM in counties in the purple tier. DEC 3: ICU CAPACITY TRIGGERS STAY AT HOME. : California Health Officials Announce a Regional Stay at Home Order Triggered by ICU Capacity! Non-essential businesses have been closed. JAN 25 STAY AT HOME REQUIREMENT IS LIFTED: Regional Stay Home Order ended January 25, 2021. Counties returned to their appropriate tier under the Blueprint for a Safer Economy. Other state orders are still in place.
What happened on New York on 03/22/2020: New York was hit by the virus first, or at least, was were the virus exploded and propagated first. This can be proved by the fact that the New York plot is the first to have a spike. The Governor expanded their previous orders to include: Limiting crowd capacity for social and recreational gatherings to 50 people; Closing restaurants and bars for on premise service and move to take-out and delivery services only; and Temporary closing of movie theaters, gyms, casinos and schools. Basically, all non-essential businesses. On May 29, an executive order was issued, lifting reductions and restrictions.
Whenever there was an ease of restrictions on 2020, cases and fatality in New York and California jumped, which trigger new restrictions again.
On the other hand, Florida has remained opened the longest, since September 25/2020, and yet, fatalities per 100k remain comparable to Los Angeles(Please see below in the notebook). Why with more restrictions and for a prolonged time, Los Angeles and Miami have about the same numbers in term of deaths per 100 thousand people? It would be a good idea to explore also the medical infrastructure vs Population being hospitalized. That is a big difference between the two cities. Is the metric "Confirmed positive cases per 100 thousand people" the right metric to be analyzing during a global pandemic?. Also, a further analysis could be well served by incorporating the age factor. What are the ages of the people getting sick, hospitalized and dying in the later months of 2021? The Johns Hopkins website has a nice visualisation of the Covid-19 policy  impact for every state, which I later discovered when trying to download the datasets. But they do not compare states vs states, or cities vs cities, which is  what I wanted to do. I focused on New York, Miami and Los Angeles, because the data shows they have being impacted by the virus greatly and  in different ways. 

In [None]:
covid_data_deaths_per_day = covid_data_deaths_df.copy()
covid_data_deaths_per_day = covid_data_deaths_per_day.drop(['Province_State', 'Population','total_deaths_per_100k'], axis=1)
deaths_per_day = covid_data_deaths_per_day.diff(axis=1).fillna(0).abs()
deaths_per_day.head(5)

**VISUALIZATION # 2**

In [None]:
def vis2():
    ''' This function plots Covid-19 Deaths Per Day (US)'''
    
    miami_deaths_per_day = deaths_per_day.loc['Miami-Dade, Florida, US']
    san_francisco_deaths_per_day = deaths_per_day.loc['San Francisco, California, US']
    los_angeles_deaths_per_day = deaths_per_day.loc['Los Angeles, California, US']
    denver_deaths_per_day = deaths_per_day.loc['Denver, Colorado, US']
    new_york_deaths_per_day= deaths_per_day.loc['Queens, New York, US']
    dallas_deaths_per_day = deaths_per_day.loc['Dallas, Texas, US']
    fig, (ax1) = plt.subplots(figsize=(8, 6), sharex=True,  constrained_layout=True,  dpi=100)  
    ax1.scatter(my_arr, miami_deaths_per_day, label='Miami-Dade', s=12, alpha=0.8)
    ax1.scatter(my_arr, san_francisco_deaths_per_day, label='San Francisco', s=12, alpha=0.8)
    ax1.scatter(my_arr, los_angeles_deaths_per_day, label='Los Angeles', s=12, alpha=0.8)
    ax1.scatter(my_arr, denver_deaths_per_day, label='Denver', s=12, alpha=0.8)
    ax1.scatter(my_arr, new_york_deaths_per_day, label='Queens', s=12, alpha=0.8)
    ax1.scatter(my_arr, dallas_deaths_per_day, label='Dallas', s=12, alpha=0.8)
    ax1.legend()
    ax1.yaxis.set_major_formatter(FormatStrFormatter('%d'))
    ax1.grid(True, ls='--')
    ax1.set_title('Covid-19 Deaths Per Day (US)')
    ax1.set_ylabel('Covid-19 Deaths')
    plt.xticks(np.arange(0, len(columns[3:]), 15), xticks_dates_labels_new, rotation=90, fontsize=10)
    plt.show()
vis2()

Two interesting hikes in deaths. The first one is Queens, NY, at the beginning of the pandemic. And a second one started on November 27/2020 in Los Angeles, CA. This preceeded several restrictions due to ICU capacity being impacted for all the hospitalizations.

**VISUALIZATION # 3**

In [None]:
def vis3():
    '''Plots the highest death count per day from a single city (US)'''
    
    fig, axes = plt.subplots(figsize=(8,6), dpi=100)
    axes.scatter(my_arr, deaths_per_day.max(), s=10, alpha=0.8 )
    axes.grid(True, ls='--')
    axes.set_title('Highest death count per day from a single city (US)')
    axes.set_ylabel('Covid-19 Deaths')
    plt.xticks(np.arange(0, len(columns[3:]), 15), xticks_dates_labels_new, rotation=90, fontsize=10)
    plt.show()

vis3()

In [None]:
np.seterr(divide='ignore', invalid='ignore')
covid_data_deaths_df.columns.is_unique
covid_deaths_per100k_df = covid_data_deaths_df.copy()
covid_deaths_per100k_df.loc[:, '1/22/20': '7/9/21'] = covid_deaths_per100k_df.loc[:, '1/22/20': '7/9/21'].apply(lambda x: ((x / covid_data_deaths_df['Population'].values) * 100000).fillna(0), axis= 0)              
covid_deaths_per100k_df.head(5)

**VISUALIZATION # 4**

In [None]:
def vis4():
    '''Plots the  Deaths per 100k (US) in 6 cities'''
    
    miami_deaths_per100k = covid_deaths_per100k_df.loc['Miami-Dade, Florida, US']
    san_francisco_deaths_per100k = covid_deaths_per100k_df.loc['San Francisco, California, US']
    los_angeles_deaths_per100k = covid_deaths_per100k_df.loc['Los Angeles, California, US']
    denver_deaths_per100k = covid_deaths_per100k_df.loc['Denver, Colorado, US']
    new_york_deaths_per100k = covid_deaths_per100k_df.loc['Queens, New York, US']
    dallas_deaths_per100k = covid_deaths_per100k_df.loc['Dallas, Texas, US']

    fig, axes2 = plt.subplots(figsize=(8, 6), dpi=100)
    axes2.plot(my_arr, miami_deaths_per100k[3:], label='Miami')
    axes2.plot(my_arr, san_francisco_deaths_per100k[3:], label='San Francisco')
    axes2.plot(my_arr, los_angeles_deaths_per100k[3:], label='Los Angeles')
    axes2.plot(my_arr, denver_deaths_per100k[3:], label='Denver')
    axes2.plot(my_arr, new_york_deaths_per100k[3:], label='Queens')
    axes2.plot(my_arr, dallas_deaths_per100k[3:], label='Dallas')
    axes2.grid(True, ls='--')
    start, end = axes2.get_xlim()
    axes2.xaxis.set_ticks(np.arange(start, end, 30))
    plt.legend()
    plt.ylabel('Deaths per 100k', fontsize=9)
    plt.title('Deaths per 100k (US)')
    plt.xticks(np.arange(0, len(columns[3:]), 15), xticks_dates_labels_new, rotation=90, fontsize=10)
    plt.show()

vis4()

In [None]:
covid_data_deaths_df.loc['Queens, New York, US', ['total_deaths_per_100k']]
#Date for this data point: July 9/2021

In [None]:
covid_data_deaths_df.loc['Miami-Dade, Florida, US', ['total_deaths_per_100k']]
#Date for this data point: July 9/2021

In [None]:
covid_data_deaths_df.loc['Los Angeles, California, US', ['total_deaths_per_100k']]
#Date for this data point: July 9/2021

In [None]:
covid_data_deaths_df.loc['Denver, Colorado, US', ['total_deaths_per_100k']]
#Date for this data point: July 9/2021

In [None]:
covid_data_deaths_df.loc['Dallas, Texas, US', ['total_deaths_per_100k']]
#Date for this data point: July 9/2021

In [None]:
covid_data_deaths_df.loc['San Francisco, California, US', ['total_deaths_per_100k']]
#Date for this data point: July 9/2021

**VISUALIZATION # 5**

In [None]:
def vis5():
    '''Plots Death count per 100 Thousand People VS Population (US)'''
    population = covid_deaths_per100k_df['Population'].values
    deaths_per_100k = covid_deaths_per100k_df['total_deaths_per_100k'].values
    fig, axes = plt.subplots(figsize=(8,6), dpi=100)
    axes.scatter(deaths_per_100k, population, s=30, alpha=0.6 )
    axes.yaxis.set_major_formatter(FormatStrFormatter('%d'))
    axes.set_ylim(0, 12000000,1000000)    
    plt.xlabel('Death per 100 Thousand People')
    plt.ylabel('Population')
    plt.title('Death per 100 Thousand People VS Population (US)')
    plt.show()

vis5()

In [None]:
covid_data_deaths_df.loc['Los Angeles, California, US', ['total_deaths_per_100k']]

In [None]:
covid_data_deaths_df.loc['Los Angeles, California, US', ['11/17/20']] 
# Remember to substract the dead count at two different places using this formula , and see what was the impact....

In [None]:
covid_data_deaths_df.loc['Miami-Dade, Florida, US', ['total_deaths_per_100k']]

Miami has been without many restrictions since September 25/2020, when the Governor eliminated some business restrictions, including Restaurants operations. Since then, there have been no closing restrictions and yet, the total deaths per 100 thousand people is 238.209. For Los Angeles is 244.464. The death-toll is very similar although the restrictions for businesses and general population have been more stringent on California. 

In [None]:
covid_data_daily_report_07_09_2021 = covid_data_daily_report_07_09_2021.drop(['UID', 'Last_Update', 'Lat', 'Long_', 'Recovered', 'Active', 'FIPS', 'Incident_Rate', 'Total_Test_Results', 'People_Hospitalized', 'UID', 'ISO3', 'Testing_Rate', 'Hospitalization_Rate'], axis=1)
covid_data_daily_report_04_21_2020 = covid_data_daily_report_04_21_2020.drop(['UID', 'Last_Update', 'Lat', 'Long_', 'Recovered', 'Active', 'FIPS', 'Incident_Rate', 'People_Hospitalized', 'UID', 'ISO3', 'Testing_Rate'], axis=1)
covid_data_daily_report_04_21_2020 = covid_data_daily_report_04_21_2020.rename(columns={'Mortality_Rate': 'Case_Fatality_Ratio'})
covid_data_daily_report_07_09_2021.head(5)
covid_data_daily_report_04_21_2020.head(5)

In [None]:
covid_data_daily_report_07_09_2021 = covid_data_daily_report_07_09_2021.set_index('Province_State')
covid_data_daily_report_04_21_2020 = covid_data_daily_report_04_21_2020.set_index('Province_State')

In [None]:
def states_mortality_ratio(daily_df):
    '''Returns the states mortality ratio'''
    
    florida_case_fatality_ratio = daily_df.loc['Florida', ['Case_Fatality_Ratio']]
    alaska_case_fatality_ratio = daily_df.loc['Alaska', ['Case_Fatality_Ratio']]
    california_case_fatality_ratio = daily_df.loc['California', ['Case_Fatality_Ratio']]
    utah_case_fatality_ratio = daily_df.loc['Utah', ['Case_Fatality_Ratio']]
    colorado_case_fatality_ratio = daily_df.loc['Colorado', ['Case_Fatality_Ratio']]
    newyork_case_fatality_ratio = daily_df.loc['New York', ['Case_Fatality_Ratio']]
    texas_case_fatality_ratio = daily_df.loc['Texas', ['Case_Fatality_Ratio']]
    states = (florida_case_fatality_ratio, alaska_case_fatality_ratio, california_case_fatality_ratio, utah_case_fatality_ratio,
             colorado_case_fatality_ratio,newyork_case_fatality_ratio, texas_case_fatality_ratio )
    return states

In [None]:
daily_report_04_21_2020 = states_mortality_ratio(covid_data_daily_report_04_21_2020)
daily_report_07_09_2021 = states_mortality_ratio(covid_data_daily_report_07_09_2021)

**VISUALIZATION # 6**

In [None]:
def vis6(states_date1,states_date2, date1, date2):
    '''Plots a barh graph to compare case fatality ratio accross states on different dates '''
    
    #CASE FATALITY RATIO = (DEATHS/CONFIRMED) * 100
    y_pos1 = np.arange(len(states_date1))
    y_pos2 = np.arange(len(states_date2))
    fig, axes3 = plt.subplots(2, 1, figsize=(10, 8), constrained_layout=True, sharex=True, dpi=100)
    axes3[1].set_xlabel('Case Fatality Ratio (%)')
    axes3[0].set_title(f'(US) Case Fatality Ratio per State  {date1}')
    axes3[1].set_title(f'(US) Case Fatality Ratio per State {date2}')   
    for state in states_date1: 
        axes3[0].barh(state.name, state.values, align='center')
    for state in states_date2: 
        axes3[1].barh(state.name, state.values, align='center')
    plt.show()

In [None]:
vis6(daily_report_04_21_2020 ,daily_report_07_09_2021, '04/21/2020', '07/09/2021')

The case fatality ratio on 07/09/2021 (CASE FATALITY RATIO = (DEATHS/CONFIRMED CASES) * 100) has been greatly reduced compared 04/21/2020. According to this graph, there is an improvement in chances of surviving the virus. One could think that, at this point in the pandemic, a great percentage of people over 60 years old have been vaccinated. 