## Hypothesis: Warmer regions have lesser number of COVID-19 cases


### Comparing cases country-wise

In [5]:
import pandas as pd
import numpy as np

In [18]:
new_cases = 'new_cases_per_million.csv'
total_cases = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/ecdc/total_cases_per_million.csv'

In [19]:
def get_newcases(csv_file):
    
    """Given a filename which has a comma separated (.csv) filetype,return a N dimensional 
    array (numpy.ndarray).
    
    We load the file as a pandas dataframe having no headers and load only the relevant columns 
    needed for analysis.
    
    :param csv_file: The csv file to load
    :return: a N dimensional array (numpy.ndarray)
    """
    data_file = pd.read_csv(csv_file,usecols=['date','Australia','United States','United States Virgin Islands']) # Importing only necessary columns from the dataset
    return data_file


In [20]:
covid_cases = get_newcases(new_cases)
covid_cases #time to process=?

Unnamed: 0,date,Australia,United States,United States Virgin Islands
0,2019-12-31,0.000,0.000,
1,2020-01-01,0.000,0.000,
2,2020-01-02,0.000,0.000,
3,2020-01-03,0.000,0.000,
4,2020-01-04,0.000,0.000,
...,...,...,...,...
117,2020-04-26,0.627,146.612,9.576
118,2020-04-27,0.392,81.138,0.000
119,2020-04-28,0.471,68.099,38.306
120,2020-04-29,0.510,72.906,0.000


In [21]:
covid_cases.rename({'date': 'Date'}, axis=1, inplace=True)
covid_cases

Unnamed: 0,Date,Australia,United States,United States Virgin Islands
0,2019-12-31,0.000,0.000,
1,2020-01-01,0.000,0.000,
2,2020-01-02,0.000,0.000,
3,2020-01-03,0.000,0.000,
4,2020-01-04,0.000,0.000,
...,...,...,...,...
117,2020-04-26,0.627,146.612,9.576
118,2020-04-27,0.392,81.138,0.000
119,2020-04-28,0.471,68.099,38.306
120,2020-04-29,0.510,72.906,0.000


In [22]:
def get_totalcases_cold(csv_file):
    
    """Given a filename which has a comma separated (.csv) filetype,return a N dimensional 
    array (numpy.ndarray).
    
    We load the file as a pandas dataframe having no headers and load only the relevant columns 
    needed for analysis.
    
    :param csv_file: The csv file to load
    :return: a N dimensional array (numpy.ndarray)
    """
    data_file = pd.read_csv(csv_file,usecols=['date','United States','United Kingdom','Italy','Spain','China']) # Importing only necessary columns from the dataset
    return data_file

In [23]:
colder_country_cases = get_totalcases_cold(total_cases)
colder_country_cases

Unnamed: 0,date,China,Italy,Spain,United Kingdom,United States
0,2019-12-31,0.019,0.000,0.000,0.000,0.000
1,2020-01-01,0.019,0.000,0.000,0.000,0.000
2,2020-01-02,0.019,0.000,0.000,0.000,0.000
3,2020-01-03,0.031,0.000,0.000,0.000,0.000
4,2020-01-04,0.031,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...
120,2020-04-29,58.319,3332.764,4553.908,2373.759,3059.139
121,2020-04-30,58.322,3367.265,4564.988,2433.801,3141.694
122,2020-05-01,58.330,3398.227,4603.080,2522.655,3232.077
123,2020-05-02,58.332,3430.727,,2614.000,3334.659


In [26]:
colder_country_cases.isna().sum()

date              0
China             0
Italy             0
Spain             1
United Kingdom    0
United States     0
dtype: int64

In [40]:
colder_country_cases['Spain'] = colder_country_cases['Spain'].fillna('0') #replace null values with zero
colder_country_cases.rename({'date': 'Date'}, axis=1, inplace=True)
colder_country_cases = colder_country_cases[colder_country_cases['Date'] > '2019-12-31']
colder_country_cases

Unnamed: 0,Date,China,Italy,Spain,United Kingdom,United States
1,2020-01-01,0.019,0.000,0,0.000,0.000
2,2020-01-02,0.019,0.000,0,0.000,0.000
3,2020-01-03,0.031,0.000,0,0.000,0.000
4,2020-01-04,0.031,0.000,0,0.000,0.000
5,2020-01-05,0.041,0.000,0,0.000,0.000
...,...,...,...,...,...,...
120,2020-04-29,58.319,3332.764,4553.91,2373.759,3059.139
121,2020-04-30,58.322,3367.265,4564.99,2433.801,3141.694
122,2020-05-01,58.330,3398.227,4603.08,2522.655,3232.077
123,2020-05-02,58.332,3430.727,0,2614.000,3334.659


In [41]:
def get_totalcases_hot(csv_file):
    
    """Given a filename which has a comma separated (.csv) filetype,return a N dimensional 
    array (numpy.ndarray).
    
    We load the file as a pandas dataframe having no headers and load only the relevant columns 
    needed for analysis.
    
    :param csv_file: The csv file to load
    :return: a N dimensional array (numpy.ndarray)
    """
    data_file = pd.read_csv(csv_file,usecols=['date','Australia','Brazil','Chile','South Africa','New Zealand']) # Importing only necessary columns from the dataset
    return data_file

In [42]:
warmer_country_cases = get_totalcases_hot(total_cases)
warmer_country_cases

Unnamed: 0,date,Australia,Brazil,Chile,New Zealand,South Africa
0,2019-12-31,0.000,0.000,,0.000,
1,2020-01-01,0.000,0.000,,0.000,
2,2020-01-02,0.000,0.000,,0.000,
3,2020-01-03,0.000,0.000,,0.000,
4,2020-01-04,0.000,0.000,,0.000,
...,...,...,...,...,...,...
120,2020-04-29,264.237,338.193,751.457,233.502,84.237
121,2020-04-30,264.550,367.718,778.659,234.124,90.206
122,2020-05-01,265.178,401.676,838.189,234.746,95.214
123,2020-05-02,265.374,430.887,889.716,235.161,100.339


In [43]:
warmer_country_cases.isna().sum()

date             0
Australia        0
Brazil           0
Chile           65
New Zealand      9
South Africa    68
dtype: int64

In [44]:
warmer_country_cases['Chile']=warmer_country_cases['Chile'].fillna('0')
warmer_country_cases['New Zealand']=warmer_country_cases['New Zealand'].fillna('0')
warmer_country_cases['South Africa']=warmer_country_cases['South Africa'].fillna('0')
warmer_country_cases.rename({'date': 'Date'}, axis=1, inplace=True)
warmer_country_cases = warmer_country_cases[warmer_country_cases['Date'] > '2019-12-31']
warmer_country_cases

Unnamed: 0,Date,Australia,Brazil,Chile,New Zealand,South Africa
1,2020-01-01,0.000,0.000,0,0,0
2,2020-01-02,0.000,0.000,0,0,0
3,2020-01-03,0.000,0.000,0,0,0
4,2020-01-04,0.000,0.000,0,0,0
5,2020-01-05,0.000,0.000,0,0,0
...,...,...,...,...,...,...
120,2020-04-29,264.237,338.193,751.457,233.502,84.237
121,2020-04-30,264.550,367.718,778.659,234.124,90.206
122,2020-05-01,265.178,401.676,838.189,234.746,95.214
123,2020-05-02,265.374,430.887,889.716,235.161,100.339
