## 1. Data Cleaning ##
#### COVID-19 Data ####

In [212]:
# Import modules
import pandas as pd
import numpy as np

# Create data frame
covid_df = pd.read_csv('data/global-covid.csv', index_col=0)

# Remove unnecessary columns
col_to_drop = ['NewCases', 'NewDeaths', 'TotalRecovered', 'NewRecovered', 'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop', 'TotalTests', 'Tests/1M pop', 'WHO Region']
covid_df.drop(columns=col_to_drop, inplace=True)

# Remove Diamond Princess column
covid_df.drop('Diamond Princess')

# Create a data frame for each continent
na_covid_df = covid_df[covid_df.Continent == 'North America']
sa_covid_df = covid_df[covid_df.Continent == 'South America']
asia_covid_df = covid_df[covid_df.Continent == 'Asia']
europe_covid_df = covid_df[covid_df.Continent == 'Europe']
africa_covid_df = covid_df[covid_df.Continent == 'Africa']
aus_covid_df = covid_df[covid_df.Continent == 'Australia/Oceania']

# covid_df
# na_covid_df
# sa_covid_df
# asia_covid_df
# europe_covid_df
# africa_covid_df
# aus_covid_df

#### 2009 Global Population Data ####

In [213]:
# Create data frame
population_df = pd.read_csv('data/2009-population-by-country.csv')

# Remove extra column
population_df.drop(columns='Delete', inplace=True)

population_df['Population'] = pd.to_numeric(population_df['Population'], errors='coerce')
# Population column is in millions, so multiply by 1000000
population_df['Population'] = population_df['Population'] * 1000000


#### H1N1 Data ####

In [214]:
# Create data frames
usa_h1n1_df = pd.read_csv('data/usa-swine-flu.csv')
europe_h1n1_df = pd.read_csv('data/europe-swine-flu.csv')
africa_h1n1_df = pd.read_csv('data/africa-swine-flu.csv')
asia_h1n1_df = pd.read_csv('data/asia-swine-flu.csv')
sa_h1n1_df = pd.read_csv('data/south-america-swine-flu.csv')

# Remove unnecessary columns
usa_h1n1_df.drop(columns='State reported hospitalization', inplace=True)
sa_h1n1_df.drop(columns='Estimated no. of cases', inplace=True)

# Calculate totals for U.S.
usa_cases = usa_h1n1_df['State reported confirmed cases'].sum()
usa_deaths = usa_h1n1_df['Confirmed deaths'].sum()
usa_totals = {'State/Territory':'Total',
                           'State reported confirmed cases':usa_cases,
                           'Confirmed deaths':usa_deaths}
usa_h1n1_df = usa_h1n1_df.append(usa_totals, ignore_index=True)

# Calculate totals for Europe
europe_cases = europe_h1n1_df['Cumulative no. of lab confirmed cases'].sum()
europe_deaths = europe_h1n1_df['Cumulative no. confirmed deaths'].sum()
europe_totals = {'Country':'Total',
                           'Cumulative no. of lab confirmed cases':europe_cases,
                           'Cumulative no. confirmed deaths':europe_deaths}
europe_h1n1_df = europe_h1n1_df.append(europe_totals, ignore_index=True)

#Calculate totals for Africa
africa_cases = africa_h1n1_df['Cumulative no. of lab confirmed cases'].sum()
africa_deaths = africa_h1n1_df['Cumulative no. confirmed deaths'].sum()
africa_totals = {'Country':'Total',
                           'Cumulative no. of lab confirmed cases':africa_cases,
                           'Cumulative no. confirmed deaths':africa_deaths}
africa_h1n1_df = africa_h1n1_df.append(africa_totals, ignore_index=True)

# Calculate totals for Asia
asia_cases = asia_h1n1_df['Cumulative no. of lab confirmed cases'].sum()
asia_deaths = asia_h1n1_df['Cumulative no. confirmed deaths'].sum()
asia_totals = {'Country':'Total',
                           'Cumulative no. of lab confirmed cases':asia_cases,
                           'Cumulative no. confirmed deaths':asia_deaths}
asia_h1n1_df = asia_h1n1_df.append(europe_totals, ignore_index=True)
asia_h1n1_df

# Calculate totals for South America
sa_cases = sa_h1n1_df['Cumulative no. of lab confirmed cases'].sum()
sa_deaths = sa_h1n1_df['Cumulative no. confirmed deaths'].sum()
sa_totals = {'Country':'Total',
                           'Cumulative no. of lab confirmed cases':sa_cases,
                           'Cumulative no. confirmed deaths':sa_deaths}
sa_h1n1_df = sa_h1n1_df.append(sa_totals, ignore_index=True)
sa_h1n1_df

# Add population column


Unnamed: 0,Country,Cumulative no. of lab confirmed cases,Cumulative no. confirmed deaths
0,Brazil,28886,1205
1,Chile,12252,132
2,Peru,9268,221
3,Argentina,9119,580
4,Colombia,4310,272
5,Uruguay,3056,33
6,Bolivia,2269,56
7,Ecuador,2002,67
8,Venezuela,1782,91
9,Paraguay,682,42


#### SARS Data ####