In [9]:
# Import Dependencies
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [22]:
ageSexDF = pd.read_csv("resources/Provisional_COVID-19_Deaths_by_Sex_and_Age.csv")
ageSexDF_original = ageSexDF
ageSexDF.head()

Unnamed: 0,Data As Of,Start Date,End Date,Group,Year,Month,State,Sex,Age Group,COVID-19 Deaths,Total Deaths,Pneumonia Deaths,Pneumonia and COVID-19 Deaths,Influenza Deaths,"Pneumonia, Influenza, or COVID-19 Deaths",Footnote
0,10/20/2021,01/01/2020,10/16/2021,By Total,,,United States,All Sexes,All Ages,723880.0,5928410.0,649389.0,369136.0,9372.0,1012160.0,
1,10/20/2021,01/01/2020,10/16/2021,By Total,,,United States,All Sexes,Under 1 year,132.0,33432.0,367.0,16.0,23.0,506.0,
2,10/20/2021,01/01/2020,10/16/2021,By Total,,,United States,All Sexes,0-17 years,542.0,59268.0,1043.0,138.0,189.0,1636.0,
3,10/20/2021,01/01/2020,10/16/2021,By Total,,,United States,All Sexes,1-4 years,63.0,6236.0,210.0,16.0,65.0,322.0,
4,10/20/2021,01/01/2020,10/16/2021,By Total,,,United States,All Sexes,5-14 years,172.0,9911.0,303.0,54.0,80.0,501.0,


In [23]:
# cleanup tasks
# 1. get rid of "all age" and "all sex" rows because these are just combined from the different age groups and sexes
# 2. get rid of "by total" and "by year" - just added up from "by month"
# 3. get rid of "United States" - just added up from the individual states
filter1 = ageSexDF['Sex']!='All Sexes'
filter2 = ageSexDF['Age Group']!='All Ages'
filter3 = ageSexDF['Group']=='By Month'
filter4 = ageSexDF['State']!='United States'
ageSexDF = ageSexDF.loc[filter1 & filter2 & filter3 & filter4]

# 4. figure out the age groups - there is overlap
filter5 = ((ageSexDF['Age Group'] == '0-17 years') | 
           (ageSexDF['Age Group'] == '18-29 years') |
           (ageSexDF['Age Group'] == '30-39 years') |
           (ageSexDF['Age Group'] == '40-49 years') |
           (ageSexDF['Age Group'] == '50-64 years') |
           (ageSexDF['Age Group'] == '65-74 years') |
           (ageSexDF['Age Group'] == '75-84 years') |
           (ageSexDF['Age Group'] == '85 years and over'))
ageSexDF = ageSexDF.loc[filter5]

# 5. get rid of "Pneumonia.... or COVID-19 Deaths" and "Footnote" columns - they are added up as well from the covid, pneumonia death information
ageSexDF = ageSexDF.drop('Pneumonia, Influenza, or COVID-19 Deaths', axis = 1)
ageSexDF = ageSexDF.drop('Footnote', axis = 1)

ageSexDF.head()

Unnamed: 0,Data As Of,Start Date,End Date,Group,Year,Month,State,Sex,Age Group,COVID-19 Deaths,Total Deaths,Pneumonia Deaths,Pneumonia and COVID-19 Deaths,Influenza Deaths
9403,10/20/2021,01/01/2020,01/31/2020,By Month,2020.0,1.0,Alabama,Male,0-17 years,0.0,44.0,0.0,0.0,
9407,10/20/2021,01/01/2020,01/31/2020,By Month,2020.0,1.0,Alabama,Male,18-29 years,0.0,53.0,0.0,0.0,0.0
9409,10/20/2021,01/01/2020,01/31/2020,By Month,2020.0,1.0,Alabama,Male,30-39 years,0.0,67.0,,0.0,0.0
9411,10/20/2021,01/01/2020,01/31/2020,By Month,2020.0,1.0,Alabama,Male,40-49 years,0.0,129.0,,0.0,
9413,10/20/2021,01/01/2020,01/31/2020,By Month,2020.0,1.0,Alabama,Male,50-64 years,,521.0,28.0,0.0,


In [None]:
# What age group is most susceptible and what are the differences among the age groups? - prashanth to break down into tasks
# bar chart showing each group
# it might make sense to make bigger age ranges for this...
# - "young adult" vs "middle age", "children", and "seniors" - binning!

# Tasks:
# Prep the data - create a dataframe for just the age groups. Combine both male and female by age group OR Create -  
# bins for the age ranges - Use 'Age Group' and 'Covid-19 Deaths' columns
# Inspect for any null values or missing information - Clean the data as necessary
# Plot -  a bar chart (Age range vs. Covid Death Count) and/or a Line chart
# Analyze and summarize the findings



In [None]:
# Is geographic location a factor in the number of deaths? - prashanth to break down into tasks
# Probably need to get the state populations for this. - an api call to fill out the data frame
# Could do a heat map for this one!

# Tasks:
# Research and find the api to get the population information by State 
# Write the code to get the state population
# Create a dataframe by State and Covid-19 deaths
# Add the State population to the state dataframe
# Create statistics by state 
# Plot a heat map using google maps  - use number of deaths as weight
#      Will need to call an API to get the lat and long for each state
# Analyze and summarize the findings 

In [None]:
# What effect does sex have on covid mortality? - prashanth to break down into tasks
# more male vs females?
# might need state population data males and females for this one

# Tasks:
# Create a dataframe for covid-19 death count by gender and state
# Call to an API to get the state population by gender
# Add the population information to the dataframe
# Create necessary stats - mainly percentage by gender and state
# Plot a pie chart to show the percentages of death by gender  - overall
# Draw a line chart to show the percntage by state (for all the 50 states) - (Can also be a bar chart)
# Analyze and summarize the findings -  Determine if there is a trend 


In [None]:
# Time - what period was the peak of covid mortality? - prashanth to break down into tasks
# How many waves were there?

# Tasks:
# Create a dataframe for covid-19 death count by months
# Draw a line chart (Year-Monnth Vs. Covid-19 Death count)
# Analyze and summarize the findings -  Determine the peaks in the death count by examining the line chart


In [29]:
# break out by timeframe (total, year, or month)
ageSexByTotalDF = ageSexDF.loc[ageSexDF["Group"]=="By Total"]
ageSexByYearDF = ageSexDF.loc[ageSexDF["Group"]=="By Year"]
ageSexByMonthDF = ageSexDF.loc[ageSexDF["Group"]=="By Month"]
# let's try getting Georgia by month for all ages and all sexes
ageSexGeorgiaByMonthDF = ageSexByMonthDF.loc[ageSexByMonthDF['State']=='Georgia']
ageSexGeorgiaByMonthDF.count()
#ageSexGeorgiaByMonthDF.head()

Data As Of                                  1122
Start Date                                  1122
End Date                                    1122
Group                                       1122
Year                                        1122
Month                                       1122
State                                       1122
Sex                                         1122
Age Group                                   1122
COVID-19 Deaths                              873
Total Deaths                                1042
Pneumonia Deaths                             799
Pneumonia and COVID-19 Deaths                869
Influenza Deaths                             903
Pneumonia, Influenza, or COVID-19 Deaths     831
Footnote                                     650
dtype: int64

In [None]:
plt.plot(ageSexGeorgiaByMonthDF[])