In [15]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

In [2]:
data = pd.read_csv('covid_19_clean_complete.csv')

In [3]:
data.shape

(49068, 10)

In [4]:
data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
0,,Afghanistan,33.93911,67.709953,2020-01-22,0,0,0,0,Eastern Mediterranean
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0,Europe
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0,Africa
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0,Europe
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0,Africa


In [9]:
data['Date'] = pd.to_datetime(data['Date'])
data.dtypes

Province/State            object
Country/Region            object
Lat                      float64
Long                     float64
Date              datetime64[ns]
Confirmed                  int64
Deaths                     int64
Recovered                  int64
Active                     int64
WHO Region                object
dtype: object

In [44]:
data['Year'] = pd.DatetimeIndex(data['Date']).year

In [11]:
data['Country/Region'].value_counts()

China             6204
Canada            2256
France            2068
United Kingdom    2068
Australia         1504
                  ... 
Holy See           188
Honduras           188
Hungary            188
Iceland            188
Lesotho            188
Name: Country/Region, Length: 187, dtype: int64

In [12]:
data['WHO Region'].value_counts()

Europe                   15040
Western Pacific          10340
Africa                    9024
Americas                  8648
Eastern Mediterranean     4136
South-East Asia           1880
Name: WHO Region, dtype: int64

### Exploring Cases Each WHO Region

In [60]:
region_wise_confirmed = data.groupby('WHO Region')[['Confirmed', 'Deaths', 'Recovered']].sum()
region_wise_confirmed

Unnamed: 0_level_0,Confirmed,Deaths,Recovered
WHO Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa,21791827,439978,11193730
Americas,402261194,19359292,157069444
Eastern Mediterranean,74082892,1924029,48050703
Europe,248879793,19271040,123202075
South-East Asia,55118365,1458134,30030327
Western Pacific,26374411,932430,18861950


In [63]:
barchart = px.bar(
    data_frame = region_wise_confirmed, x = region_wise_confirmed.index , y = 'Confirmed',               
    opacity = 0.9, orientation = "v", barmode = 'relative',          
    hover_data = ['Deaths', 'Recovered'],
    color_continuous_scale=px.colors.diverging.Picnic,      
    range_color=[1,10000], 
    labels={"y":"Total Confirmed Cases"}, title='WHO Region-wise Total Cases',
    template='gridon'

)


pio.show(barchart)

Americas and Europe had the most cases in 2020.

### Top 15 Countries with Most Active Cases

In [66]:
most_active = data.groupby('Country/Region')['Active'].sum().sort_values(ascending = False).iloc[:15]
most_active

Country/Region
US                156981121
Brazil             31094060
United Kingdom     22624595
Russia             19668578
India              15987913
France             10980287
Spain               9277432
Canada              8656985
Peru                7748957
Italy               7363518
Pakistan            5633262
South Africa        5150341
Bangladesh          4924394
Netherlands         4528235
Sweden              4524247
Name: Active, dtype: int64

In [72]:
pie_chart = px.pie(
                data_frame = most_active, values=most_active.values, names = most_active.index,
                labels = {"state":"the State"},       
                title = 'Top 15 Countries with the Most Active Cases',     
                template = 'presentation',            
                width = 800, height = 600, hole = 0.2,                           
                )

pio.show(pie_chart)

### Distribution Country and WHO Region Wise

In [83]:
pd.set_option('display.max_rows', None)

df1 = data.groupby(['WHO Region', "Country/Region"])['Confirmed', 'Deaths', 'Recovered'].sum().sort_values(ascending = False,by = ['Confirmed', 'Deaths', 'Recovered'])
df1


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0_level_0,Unnamed: 1_level_0,Confirmed,Deaths,Recovered
WHO Region,Country/Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Americas,US,224345948,11011411,56353416
Americas,Brazil,89524967,3938034,54492873
Europe,Russia,45408411,619385,25120448
South-East Asia,India,40883464,1111831,23783720
Europe,Spain,27404045,3033030,15093583
Europe,United Kingdom,26748587,3997775,126217
Europe,Italy,26745145,3707717,15673910
Europe,France,21210926,3048524,7182115
Europe,Germany,21059152,871322,17107839
Eastern Mediterranean,Iran,19339267,1024136,15200895
