## Johnhopkins Covid 19 Data Analysis

In [34]:
import pandas as pd

### Reading data from Johnhopkins Github repository

In [35]:
Confirmed_filepath = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
Death_filepath = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
Recovered_filepath = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

In [36]:
 def import_data(filepath):
    data = filepath
    data = pd.read_csv(data)
    data.rename(columns={'Province/State':'Province','Country/Region':'Country'},inplace = True)
    return data      

In [37]:
def transform_data(data):
    data = pd.melt(
    frame = data,
    id_vars = ['Province','Country','Lat','Long','Status'],
    var_name = 'Date',
    value_name = 'Count')
    return data

#### Importing to data frame and transforming data for Confirmed, Death and Recovered global cases

In [38]:
ConfirmedCases = import_data(Confirmed_filepath)
ConfirmedCases['Status'] = 'Confirmed'
ConfirmedCases = transform_data(ConfirmedCases)

In [39]:
ConfirmedCases.head()

Unnamed: 0,Province,Country,Lat,Long,Status,Date,Count
0,,Afghanistan,33.93911,67.709953,Confirmed,1/22/20,0
1,,Albania,41.1533,20.1683,Confirmed,1/22/20,0
2,,Algeria,28.0339,1.6596,Confirmed,1/22/20,0
3,,Andorra,42.5063,1.5218,Confirmed,1/22/20,0
4,,Angola,-11.2027,17.8739,Confirmed,1/22/20,0


In [40]:
DeathCases = import_data(Death_filepath)
DeathCases['Status'] = 'Death'
DeathCases = transform_data(DeathCases)

In [41]:
DeathCases.head()

Unnamed: 0,Province,Country,Lat,Long,Status,Date,Count
0,,Afghanistan,33.93911,67.709953,Death,1/22/20,0
1,,Albania,41.1533,20.1683,Death,1/22/20,0
2,,Algeria,28.0339,1.6596,Death,1/22/20,0
3,,Andorra,42.5063,1.5218,Death,1/22/20,0
4,,Angola,-11.2027,17.8739,Death,1/22/20,0


In [42]:
RecoveredCases = import_data(Recovered_filepath)
RecoveredCases['Status'] = 'Recovered'
RecoveredCases = transform_data(RecoveredCases)

In [43]:
RecoveredCases.head()

Unnamed: 0,Province,Country,Lat,Long,Status,Date,Count
0,,Afghanistan,33.93911,67.709953,Recovered,1/22/20,0
1,,Albania,41.1533,20.1683,Recovered,1/22/20,0
2,,Algeria,28.0339,1.6596,Recovered,1/22/20,0
3,,Andorra,42.5063,1.5218,Recovered,1/22/20,0
4,,Angola,-11.2027,17.8739,Recovered,1/22/20,0


### Merging data

In [44]:
Totaldata = pd.concat([ConfirmedCases,DeathCases,RecoveredCases])

In [45]:
Totaldata.dtypes

Province     object
Country      object
Lat         float64
Long        float64
Status       object
Date         object
Count         int64
dtype: object

In [46]:
Totaldata['Date'] = pd.to_datetime(Totaldata['Date'])
Totaldata.set_index('Date',inplace= True)

In [47]:
Totaldata.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 745920 entries, 2020-01-22 to 2022-06-27
Data columns (total 6 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   Province  222888 non-null  object 
 1   Country   745920 non-null  object 
 2   Lat       741480 non-null  float64
 3   Long      741480 non-null  float64
 4   Status    745920 non-null  object 
 5   Count     745920 non-null  int64  
dtypes: float64(2), int64(1), object(3)
memory usage: 39.8+ MB


In [48]:
Totaldata.head()

Unnamed: 0_level_0,Province,Country,Lat,Long,Status,Count
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-22,,Afghanistan,33.93911,67.709953,Confirmed,0
2020-01-22,,Albania,41.1533,20.1683,Confirmed,0
2020-01-22,,Algeria,28.0339,1.6596,Confirmed,0
2020-01-22,,Andorra,42.5063,1.5218,Confirmed,0
2020-01-22,,Angola,-11.2027,17.8739,Confirmed,0


In [49]:
#Exporting dataset for further visualization
Totaldata.to_csv("Totaldata.csv")