# pandemic

This is a simple Jupyter Notebook for Covid-19 Pandemic Analysis.

Load packages etc...

In [1]:
import pandas as pd
import numpy as np

Download data from `CSSEGISandData`:

In [2]:
from urllib import request
confirmed_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed_file = './data/raw/confirmed.csv'
request.urlretrieve(confirmed_url, confirmed_file)
deaths_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
deaths_file = './data/raw/deaths.csv'
request.urlretrieve(deaths_url, deaths_file)

('./data/raw/deaths.csv', <http.client.HTTPMessage at 0x1f3fe1a8220>)

Load data into dataframes:

In [3]:
confirmed = pd.read_csv(confirmed_file, index_col=False)
deaths = pd.read_csv(deaths_file, index_col=False)

Show some info about data:

In [4]:
confirmed.info()
confirmed.describe()

deaths.info()
deaths.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Columns: 708 entries, Province/State to 12/25/21
dtypes: float64(2), int64(704), object(2)
memory usage: 1.5+ MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Columns: 708 entries, Province/State to 12/25/21
dtypes: float64(2), int64(704), object(2)
memory usage: 1.5+ MB


Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,12/16/21,12/17/21,12/18/21,12/19/21,12/20/21,12/21/21,12/22/21,12/23/21,12/24/21,12/25/21
count,278.0,278.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0,...,280.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0,280.0
mean,20.156042,21.788955,0.060714,0.064286,0.092857,0.15,0.2,0.292857,0.467857,0.475,...,19064.532143,19088.942857,19110.057143,19125.103571,19149.489286,19176.7,19205.460714,19238.342857,19259.964286,19274.371429
std,25.283318,76.200169,1.015944,1.017487,1.436326,2.391517,3.109011,4.5429,7.470302,7.470809,...,74915.293517,75008.379394,75063.271362,75100.814998,75188.609515,75291.187559,75407.535239,75571.226069,75647.081044,75671.597846
min,-51.7963,-178.1165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,4.643279,-37.713675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.75
50%,21.51717,20.921188,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,701.5,701.5,701.5,701.5,701.5,701.5,701.5,702.0,702.0,702.0
75%,40.39335,84.992575,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7127.25,7134.75,7156.0,7173.0,7191.0,7210.5,7235.25,7253.0,7272.5,7284.5
max,71.7069,178.065,17.0,17.0,24.0,40.0,52.0,76.0,125.0,125.0,...,804335.0,806072.0,806583.0,806749.0,808201.0,810045.0,812069.0,815423.0,816436.0,816463.0


Show some top data about `Poland`:

In [5]:
confirmed[confirmed['Country/Region'] == 'Poland'].head(10)
deaths[deaths['Country/Region'] == 'Poland'].head(10)


Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,12/16/21,12/17/21,12/18/21,12/19/21,12/20/21,12/21/21,12/22/21,12/23/21,12/24/21,12/25/21
213,,Poland,51.9194,19.1451,0,0,0,0,0,0,...,90306,90872,91415,91485,91514,92052,92829,93445,94041,94311


Clean up data:

In [6]:
confirmed = confirmed[confirmed['Country/Region'] == 'Poland']
confirmed = confirmed.drop(columns=['Province/State','Lat','Long', 'Country/Region'])
deaths = deaths[deaths['Country/Region'] == 'Poland']
deaths = deaths.drop(columns=['Province/State','Lat','Long', 'Country/Region'])

Calculate differences:

In [7]:
confirmed_daily = confirmed.diff(periods=1, axis=1)
confirmed_daily = confirmed_daily.replace(np.nan, 0)
deaths_daily = deaths.diff(periods=1, axis=1)
deaths_daily = deaths_daily.replace(np.nan, 0)


Unpivot data:

In [8]:
confirmed_total = confirmed.melt(
        var_name="date",
        value_name="total_cases")

confirmed_daily = confirmed_daily.melt(
        var_name="date",
        value_name="confirmed_by_day")

deaths_total = deaths.melt(
        var_name="date",
        value_name="total_deaths")

deaths_daily = deaths_daily.melt(
        var_name="date",
        value_name="deaths_by_day")

Merge data:

In [16]:
confirmed_temp = pd.merge(confirmed_total, confirmed_daily, how='left', on='date')
deaths_temp = pd.merge(deaths_total, deaths_daily, how='left', on='date')
poland_cases = pd.merge(confirmed_temp, deaths_temp, how='left', on='date')
poland_cases.replace(np.nan, 0)
poland_cases.confirmed_by_day = poland_cases.confirmed_by_day.astype(int)
poland_cases.deaths_by_day = poland_cases.deaths_by_day.astype(int)

#poland_cases.tail(50)

poland_cases_filtered = poland_cases[['date','confirmed_by_day','deaths_by_day']]
poland_cases_filtered.tail(100)
