# Project 1
### Epidemiological Study: US Vaccination Campaign (November 2020 - March 2021)
---
### Project Description/Outline
Determine the effectiveness of the US vaccination campaign in attending the population that is most affected by covid-19 disease. This is achieved by revising the total number of doses applied, vaccination coverage, population demographics such as gender, race, socioeconomic status, and education, versus epidemiologic variables: incidence, prevalence, hospitalization, UCI, death numbers.

In [4]:
## Dependencies
import pandas as pd



### Covid-19 Vaccination Data

In [6]:
## Import data from CSV
hesitancy_df = pd.read_csv("data/Vaccine_Hesitancy_Covid19.csv")
hesitancy_df.head()

Unnamed: 0,FIPS Code,County Name,State,Estimated hesitant,Estimated strongly hesitant,Social Vulnerability Index (SVI),SVI Category,Ability to handle a COVID-19 outbreak (CVAC),CVAC Category,Percent adults fully vaccinated against COVID-19,Percent Hispanic,Percent non-Hispanic American Indian/Alaska Native,Percent non-Hispanic Asian,Percent non-Hispanic Black,Percent non-Hispanic Native Hawaiian/Pacific Islander,Percent non-Hispanic White,Geographical Point,State Code,County Boundary,State Boundary
0,1123,"Tallapoosa County, Alabama",ALABAMA,0.23,0.12,0.89,Very High Vulnerability,0.64,High Vulnerability,0.161,0.0242,0.0022,0.0036,0.2697,0.0,0.6887,POINT (-86.844516 32.756889),AL,"MULTIPOLYGON (((-85.841259 33.104456, -85.8409...","MULTIPOLYGON (((-88.139988 34.581703, -88.1352..."
1,1121,"Talladega County, Alabama",ALABAMA,0.23,0.11,0.87,Very High Vulnerability,0.84,Very High Vulnerability,0.133,0.0229,0.0043,0.0061,0.3237,0.0003,0.6263,POINT (-86.844516 32.756889),AL,"MULTIPOLYGON (((-86.303069 33.46316, -86.30306...","MULTIPOLYGON (((-88.139988 34.581703, -88.1352..."
2,1131,"Wilcox County, Alabama",ALABAMA,0.23,0.11,0.93,Very High Vulnerability,0.94,Very High Vulnerability,0.228,0.0053,0.0009,0.0003,0.6938,0.0,0.2684,POINT (-86.844516 32.756889),AL,"MULTIPOLYGON (((-87.52534299999999 32.132773, ...","MULTIPOLYGON (((-88.139988 34.581703, -88.1352..."
3,1129,"Washington County, Alabama",ALABAMA,0.23,0.11,0.73,High Vulnerability,0.82,Very High Vulnerability,0.192,0.0146,0.0731,0.0025,0.2354,0.0,0.6495,POINT (-86.844516 32.756889),AL,"MULTIPOLYGON (((-88.45317899999999 31.505388, ...","MULTIPOLYGON (((-88.139988 34.581703, -88.1352..."
4,1133,"Winston County, Alabama",ALABAMA,0.22,0.11,0.7,High Vulnerability,0.8,High Vulnerability,0.085,0.0315,0.0034,0.0016,0.0073,0.0005,0.937,POINT (-86.844516 32.756889),AL,"MULTIPOLYGON (((-87.63656399999999 34.120908, ...","MULTIPOLYGON (((-88.139988 34.581703, -88.1352..."


In [7]:
hesitancy_df.iloc[:,10:16].sum(axis=1).mean()*100

97.89445257797577

In [10]:
columns = [
    "FIPS Code",
    "Social Vulnerability Index (SVI)",
    "Percent adults fully vaccinated against COVID-19"
]

vaccination_df = hesitancy_df[columns]
vaccination_df.head()

Unnamed: 0,FIPS Code,Social Vulnerability Index (SVI),Percent adults fully vaccinated against COVID-19
0,1123,0.89,0.161
1,1121,0.87,0.133
2,1131,0.93,0.228
3,1129,0.73,0.192
4,1133,0.7,0.085


### US Census Reference (2019)

In [12]:
census_df = pd.read_csv("data/US_Census2019.csv")

In [13]:
census_df.iloc[1:5,15:23].head()

Unnamed: 0,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,NPOPCHG_2010,NPOPCHG_2011,NPOPCHG_2012,NPOPCHG_2013
1,55243,55390,55533,55869,176,454,-273,-227
2,207601,212521,217855,223234,847,3446,3587,4740
3,25806,25157,24872,24686,-128,14,-172,-232
4,22586,22550,22367,22394,-45,-125,-78,-146


In [15]:
census2 = census_df[["STATE", "COUNTY","STNAME","POPESTIMATE2019"]]
str(census_df['STATE']) + str(census_df['COUNTY'])

'0        1\n1        1\n2        1\n3        1\n4        1\n        ..\n3188    56\n3189    56\n3190    56\n3191    56\n3192    56\nName: STATE, Length: 3193, dtype: int640        0\n1        1\n2        3\n3        5\n4        7\n        ..\n3188    37\n3189    39\n3190    41\n3191    43\n3192    45\nName: COUNTY, Length: 3193, dtype: int64'

### Covid-19 Case Surveillance


In [None]:
cases = pd.read_csv("data/COVID-19_Case_Surveillance_Public_Use_Data_with_Geography.csv", chunksize=1000)

In [None]:
cases1 = pd.DataFrame(cases)

In [17]:
import pandas as pd
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cdc.gov", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cdc.gov,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("vbim-akqf", limit=2000000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)



In [18]:
results_df

Unnamed: 0,cdc_case_earliest_dt,cdc_report_dt,pos_spec_dt,current_status,sex,age_group,race_ethnicity_combined,hosp_yn,icu_yn,death_yn,medcond_yn,onset_dt
0,2020-10-23T00:00:00.000,2021-01-28T00:00:00.000,2020-10-23T00:00:00.000,Laboratory-confirmed case,Female,0 - 9 Years,"Black, Non-Hispanic",Missing,Missing,No,Missing,
1,2020-10-23T00:00:00.000,2020-10-23T00:00:00.000,2020-10-23T00:00:00.000,Laboratory-confirmed case,Female,0 - 9 Years,"Black, Non-Hispanic",No,Unknown,No,No,
2,2020-10-23T00:00:00.000,2020-10-25T00:00:00.000,2020-10-23T00:00:00.000,Laboratory-confirmed case,Female,0 - 9 Years,"Black, Non-Hispanic",No,Missing,Missing,Missing,2020-10-23T00:00:00.000
3,2020-10-23T00:00:00.000,2020-10-25T00:00:00.000,2020-10-23T00:00:00.000,Laboratory-confirmed case,Female,0 - 9 Years,"Black, Non-Hispanic",Missing,Missing,Missing,Missing,
4,2020-10-23T00:00:00.000,2020-10-26T00:00:00.000,2020-10-23T00:00:00.000,Laboratory-confirmed case,Female,0 - 9 Years,"Black, Non-Hispanic",No,Missing,No,Missing,
...,...,...,...,...,...,...,...,...,...,...,...,...
1999995,2021-01-19T00:00:00.000,2021-01-19T00:00:00.000,,Laboratory-confirmed case,Male,60 - 69 Years,"Multiple/Other, Non-Hispanic",No,Missing,No,Missing,
1999996,2020-11-29T00:00:00.000,2020-12-09T00:00:00.000,,Laboratory-confirmed case,Male,60 - 69 Years,"Multiple/Other, Non-Hispanic",Missing,Missing,No,Missing,2020-11-29T00:00:00.000
1999997,2021-01-04T00:00:00.000,2021-01-09T00:00:00.000,,Laboratory-confirmed case,Male,60 - 69 Years,"Multiple/Other, Non-Hispanic",No,Missing,No,Missing,2021-01-04T00:00:00.000
1999998,2021-03-01T00:00:00.000,2021-03-13T00:00:00.000,,Probable Case,Male,60 - 69 Years,"Multiple/Other, Non-Hispanic",No,Missing,No,Missing,2021-03-01T00:00:00.000
