# pandemic

This is a simple Jupyter Notebook for Covid-19 Pandemic Analysis.

Load packages etc...

In [1]:
import pandas as pd
import numpy as np

Download data from `CSSEGISandData`:

In [2]:
from urllib import request
confirmed_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed_file = './data/raw/confirmed.csv'
request.urlretrieve(confirmed_url, confirmed_file)
deaths_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
deaths_file = './data/raw/deaths.csv'
request.urlretrieve(deaths_url, deaths_file)

('./data/raw/deaths.csv', <http.client.HTTPMessage at 0x16044a6d400>)

Load data into dataframes:

In [3]:
confirmed = pd.read_csv(confirmed_file, index_col=False)
deaths = pd.read_csv(deaths_file, index_col=False)

Show some info about data:

In [4]:
confirmed.info()
confirmed.describe()

deaths.info()
deaths.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Columns: 725 entries, Province/State to 1/11/22
dtypes: float64(2), int64(721), object(2)
memory usage: 1.5+ MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Columns: 725 entries, Province/State to 1/11/22
dtypes: float64(2), int64(721), object(2)
memory usage: 1.5+ MB


Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,1/2/22,1/3/22,1/4/22,1/5/22,1/6/22,1/7/22,1/8/22,1/9/22,1/10/22,1/11/22
count,278.0,278.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0,...,280.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0
mean,20.156042,21.788955,0.060714,0.064286,0.092857,0.15,0.2,0.292857,0.467857,0.475,...,19444.017857,19465.078571,19492.853571,19520.617857,19546.539286,19572.5,19590.464286,19604.596429,19627.546429,19658.525
std,25.283318,76.200169,1.015944,1.017487,1.436326,2.391517,3.109011,4.5429,7.470302,7.470809,...,76275.508809,76362.649463,76483.215265,76590.488943,76691.519418,76817.889761,76870.765077,76920.171951,77015.771337,77148.692281
min,-51.7963,-178.1165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,4.643279,-37.713675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,22.75,22.75,22.75,22.75,22.75,22.75,22.75,22.75,23.75,25.5
50%,21.51717,20.921188,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,708.5,709.5,709.5,710.5,710.5,713.0,721.0,726.0,727.5,728.5
75%,40.39335,84.992575,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7355.0,7355.75,7365.25,7367.5,7367.75,7369.0,7369.75,7372.25,7375.25,7377.0
max,71.7069,178.065,17.0,17.0,24.0,40.0,52.0,76.0,125.0,125.0,...,826289.0,827977.0,830134.0,832120.0,833990.0,836605.0,837266.0,837665.0,839500.0,842141.0


Show some top data about `Poland`:

In [5]:
confirmed[confirmed['Country/Region'] == 'Poland'].head(10)
deaths[deaths['Country/Region'] == 'Poland'].head(10)


Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,1/2/22,1/3/22,1/4/22,1/5/22,1/6/22,1/7/22,1/8/22,1/9/22,1/10/22,1/11/22
213,,Poland,51.9194,19.1451,0,0,0,0,0,0,...,97592,97601,98034,98666,99311,99428,99720,99742,99761,100254


Clean up data:

In [6]:
confirmed = confirmed[confirmed['Country/Region'] == 'Poland']
confirmed = confirmed.drop(columns=['Province/State','Lat','Long', 'Country/Region'])
deaths = deaths[deaths['Country/Region'] == 'Poland']
deaths = deaths.drop(columns=['Province/State','Lat','Long', 'Country/Region'])

Calculate differences:

In [7]:
confirmed_daily = confirmed.diff(periods=1, axis=1)
confirmed_daily = confirmed_daily.replace(np.nan, 0)
deaths_daily = deaths.diff(periods=1, axis=1)
deaths_daily = deaths_daily.replace(np.nan, 0)


Unpivot data:

In [8]:
confirmed_total = confirmed.melt(
        var_name="date",
        value_name="total_cases")

confirmed_daily = confirmed_daily.melt(
        var_name="date",
        value_name="confirmed_by_day")

deaths_total = deaths.melt(
        var_name="date",
        value_name="total_deaths")

deaths_daily = deaths_daily.melt(
        var_name="date",
        value_name="deaths_by_day")

Merge data:

In [9]:
confirmed_temp = pd.merge(confirmed_total, confirmed_daily, how='left', on='date')
deaths_temp = pd.merge(deaths_total, deaths_daily, how='left', on='date')
poland_cases = pd.merge(confirmed_temp, deaths_temp, how='left', on='date')
poland_cases.replace(np.nan, 0)
poland_cases.confirmed_by_day = poland_cases.confirmed_by_day.astype(int)
poland_cases.deaths_by_day = poland_cases.deaths_by_day.astype(int)

#poland_cases.tail(50)

poland_cases_filtered = poland_cases[['date','confirmed_by_day','deaths_by_day']]
poland_cases_filtered.tail(100)


Unnamed: 0,date,confirmed_by_day,deaths_by_day
621,10/4/21,683,0
622,10/5/21,1327,46
623,10/6/21,2086,33
624,10/7/21,2007,29
625,10/8/21,1894,31
...,...,...,...
716,1/7/22,11901,117
717,1/8/22,10897,292
718,1/9/22,11107,22
719,1/10/22,7787,19
