# What this file does

Examines correlation between vaxx rate and case rate per 10,000 over previous two weeks on the town level.

In [38]:
#tool setup 
!pip install plotly

Collecting plotly
  Downloading plotly-5.2.1-py2.py3-none-any.whl (21.8 MB)
[K     |████████████████████████████████| 21.8 MB 18.1 MB/s eta 0:00:01
[?25hCollecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.2.1 tenacity-8.0.1


In [39]:
#imports
import pandas as pd
import plotly.express as px

In [2]:
#datasets
!ls

COVID-19_Vaccinations_by_Town.csv
COVID-19_case_rate_per_100_000_population_and_percent_test_positivity_in_the_last_14_days_by_town.csv
correlation.ipynb


# Vaccinations

In [8]:
df_vaxx = pd.read_csv('COVID-19_Vaccinations_by_Town.csv')

In [52]:
df_vaxx['Date Updated'].max()

Timestamp('2021-08-18 00:00:00')

In [9]:
df_vaxx['Date Updated'] = pd.to_datetime(df_vaxx['Date Updated'])

In [14]:
df_vaxx_recent = df_vaxx[df_vaxx['Date Updated'] == df_vaxx['Date Updated'].max()]

In [29]:
df_vaxx_recent.columns

Index(['Town', 'At least one census tract SVI≥0.75', 'Population',
       'Initiated Vaccination Count', 'Initiated Vaccination Percent',
       'Fully Vaccinated Count', 'Fully Vaccinated Percent', 'Date Updated'],
      dtype='object')

# Cases

In [15]:
df_cases = pd.read_csv('COVID-19_case_rate_per_100_000_population_and_percent_test_positivity_in_the_last_14_days_by_town.csv')

In [51]:
df_cases['Date Updated'].max()

Timestamp('2021-08-18 00:00:00')

In [17]:
df_cases['Date Updated'] = pd.to_datetime(df_cases['Date Updated'])

In [18]:
df_cases_recent = df_cases[df_cases['Date Updated'] == df_cases['Date Updated'].max()]

In [30]:
df_cases_recent.columns

Index(['Town number', 'Town', 'Town population', 'Cases in Week 1',
       'Cases in Week 2', 'Total cases over 2-week period',
       'COVID-19 cases per 100k population over 2-week period',
       'Rate category', 'Total tests', 'Percent test positivity',
       'Report period start date', 'Report period end date', 'Update date',
       'Date Updated'],
      dtype='object')

# Merge

In [23]:
df_corr_recent = df_vaxx_recent.merge(df_cases_recent, left_on='Town', right_on='Town')

In [49]:
df_corr_recent_final = df_corr_recent[['Town','COVID-19 cases per 100k population over 2-week period','Fully Vaccinated Percent']]


In [43]:
df_corr_recent_final.corr()

Unnamed: 0,COVID-19 cases per 100k population over 2-week period,Fully Vaccinated Percent
COVID-19 cases per 100k population over 2-week period,1.0,-0.229546
Fully Vaccinated Percent,-0.229546,1.0


In [44]:
df_corr_recent_final = df_corr_recent[['Town','COVID-19 cases per 100k population over 2-week period','Initiated Vaccination Percent']]


In [45]:
df_corr_recent_final.corr()

Unnamed: 0,COVID-19 cases per 100k population over 2-week period,Initiated Vaccination Percent
COVID-19 cases per 100k population over 2-week period,1.0,-0.204421
Initiated Vaccination Percent,-0.204421,1.0


# Plotly

In [47]:
fig = px.scatter(df_corr_recent_final, x="Initiated Vaccination Percent", y="COVID-19 cases per 100k population over 2-week period", hover_data=['Town'])
fig.show()

In [50]:
fig = px.scatter(df_corr_recent_final, x="Fully Vaccinated Percent", y="COVID-19 cases per 100k population over 2-week period", hover_data=['Town'])
fig.show()

# Same analysis a month ago 

This shows a stronger correlation as of a month ago

In [86]:
df_cases['Date Updated'].unique()[-5],df_vaxx['Date Updated'].unique()[-4]

(numpy.datetime64('2021-07-28T00:00:00.000000000'),
 numpy.datetime64('2021-07-28T00:00:00.000000000'))

In [80]:
df_vaxx_july = df_vaxx[df_vaxx['Date Updated'] == df_vaxx['Date Updated'].unique()[-4]]
df_cases_july = df_cases[df_cases['Date Updated'] == df_cases['Date Updated'].unique()[-5]]
df_corr_july = df_vaxx_july.merge(df_cases_july, left_on='Town', right_on='Town')

In [81]:
df_corr_july_final = df_corr_july[['Town','COVID-19 cases per 100k population over 2-week period','Fully Vaccinated Percent']]


In [82]:
df_corr_july_final.corr()

Unnamed: 0,COVID-19 cases per 100k population over 2-week period,Fully Vaccinated Percent
COVID-19 cases per 100k population over 2-week period,1.0,-0.426543
Fully Vaccinated Percent,-0.426543,1.0


In [83]:
df_corr_july_final = df_corr_july[['Town','COVID-19 cases per 100k population over 2-week period','Initiated Vaccination Percent']]


In [84]:
df_corr_july_final.corr()

Unnamed: 0,COVID-19 cases per 100k population over 2-week period,Initiated Vaccination Percent
COVID-19 cases per 100k population over 2-week period,1.0,-0.418065
Initiated Vaccination Percent,-0.418065,1.0
