In [148]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
%matplotlib inline
plt.style.use('classic')

In [149]:
def covid_nan_filler(data):
    """Takes covid-19 dataset and fills in empty string in the Province/State column"""
    output = data.fillna(value = {'Province/State':''})
    
    return output

In [150]:
def covid_melter(data, covid_type):
    """Takes covid-19 dataset and type (confirmed, deaths) then melts the data's value name into covid_type's column"""
    output = data.melt(id_vars = ['Country/Region', 'Province/State', 'Lat', 'Long'],
                      var_name = 'Date_Raw',
                      value_name = covid_type)
    
    return output

In [172]:
def covid_date_formatter(data):
    """Takes covid-19 dataset and converts the Data_Raw column into Date"""
    output = data.assign(Date = pd.to_datetime(data['Date_Raw']))
                                             
    return output

In [173]:
def covid_filter(data, covid_type):
    """Takes covid-19 datasets and type (confirmed, deaths) then filters with choosen columns names"""
    output = data.filter(['Date', 'Country/Region', 'Province/State', 'Lat', 'Long', covid_type])
    
    return output

In [174]:
def covid_data_output(url, covid_type):
    """Main function that executes defined functions and returns a clean dataframe"""
    covid_final = pd.read_csv(url)
    covid_final = covid_nan_filler(covid_final)
    covid_final = covid_melter(covid_final, covid_type)
    covid_final = covid_date_formatter(covid_final)
    covid_final = covid_filter(covid_final, covid_type)
    covid_final = col_rename(covid_final)
    return covid_final

In [199]:
def covid_merger(confirmed, deaths):
    """Takes two covid-19 datasets and merge based on chosen columns"""
    output = confirmed.merge(deaths, on=['Report Date',
                                         'Country Name', 
                                         'Province State Name',
                                         'Lat',
                                         'Long'
                                        ], how='left')
    return output

In [200]:
total_cases = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
total_deaths = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'

In [201]:
confirmed = covid_data_output(total_cases, 'Confirmed')
deaths = covid_data_output(total_deaths, 'Deaths')

In [202]:
covid_df = covid_merger(confirmed, deaths)

In [203]:
covid_df

Unnamed: 0,Report Date,Country Name,Province State Name,Lat,Long,Confirmed,Deaths
0,2020-01-22,Afghanistan,,33.000000,65.000000,0,0
1,2020-01-22,Albania,,41.153300,20.168300,0,0
2,2020-01-22,Algeria,,28.033900,1.659600,0,0
3,2020-01-22,Andorra,,42.506300,1.521800,0,0
4,2020-01-22,Angola,,-11.202700,17.873900,0,0
...,...,...,...,...,...,...,...
39895,2020-06-19,Sao Tome and Principe,,0.186360,6.613081,693,12
39896,2020-06-19,Yemen,,15.552727,48.516388,919,251
39897,2020-06-19,Comoros,,-11.645500,43.333300,210,5
39898,2020-06-19,Tajikistan,,38.861034,71.276093,5338,51
