In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Preparación de datos

In [2]:
def data_to_df(path):
    df = pd.read_csv(path, sep = ',')
    df = df.melt(id_vars = ('Province/State','Country/Region','Lat','Long'), var_name='Date', value_name='Cases')
    return df


def consolidate_data(c_path, d_path, r_path):
    'parameters: c_path = path to confirmed cases table, d_path = path to deaths cases table, r_path = path to recovered cases table'
    df1 = data_to_df(c_path)
    df2 = data_to_df(d_path)
    df3 = data_to_df(r_path)

    df1['Status'] = 'Confirmed'
    df2['Status'] = 'Deaths'
    df3['Status'] = 'Recovered'

    data = df1.append(df2)
    data = data.append(df3)
    data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
    data['Year-month'] = pd.to_datetime(data['Date'], errors='coerce').dt.to_period('M')

    return data

In [3]:
confirmed_path = 'data_sources/time_series_covid19_confirmed_global.csv'
deaths_path = 'data_sources/time_series_covid19_deaths_global.csv'
recovered_path = 'data_sources/time_series_covid19_recovered_global.csv'

data = consolidate_data(confirmed_path, deaths_path, recovered_path)
data

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Cases,Status,Year-month
0,,Afghanistan,33.939110,67.709953,2020-01-22,0,Confirmed,2020-01
1,,Albania,41.153300,20.168300,2020-01-22,0,Confirmed,2020-01
2,,Algeria,28.033900,1.659600,2020-01-22,0,Confirmed,2020-01
3,,Andorra,42.506300,1.521800,2020-01-22,0,Confirmed,2020-01
4,,Angola,-11.202700,17.873900,2020-01-22,0,Confirmed,2020-01
...,...,...,...,...,...,...,...,...
178340,,Vietnam,14.058324,108.277199,2021-11-24,0,Recovered,2021-11
178341,,West Bank and Gaza,31.952200,35.233200,2021-11-24,0,Recovered,2021-11
178342,,Yemen,15.552727,48.516388,2021-11-24,0,Recovered,2021-11
178343,,Zambia,-13.133897,27.849332,2021-11-24,0,Recovered,2021-11


In [4]:
df = data[data.Status == 'Confirmed']
df_daily = df.groupby(['Date','Country/Region'], as_index=False).sum()
df_daily = df_daily[['Date','Country/Region','Cases']]
df_daily.head()

Unnamed: 0,Date,Country/Region,Cases
0,2020-01-22,Afghanistan,0
1,2020-01-22,Albania,0
2,2020-01-22,Algeria,0
3,2020-01-22,Andorra,0
4,2020-01-22,Angola,0


In [6]:
data_summary = data[['Year-month', 'Country/Region', 'Status','Cases']]
data_summary = data_summary.groupby(['Year-month','Country/Region','Status'], as_index=False).sum()
data_summary.head()

Unnamed: 0,Year-month,Country/Region,Status,Cases
0,2020-01,Afghanistan,Confirmed,0
1,2020-01,Afghanistan,Deaths,0
2,2020-01,Afghanistan,Recovered,0
3,2020-01,Albania,Confirmed,0
4,2020-01,Albania,Deaths,0
