# Part-1: COVID-19 World data analysis

In [12]:
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.parser import parse
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

In [13]:
init_notebook_mode(connected=True)  

# Importing Data into a DataFrame

In [193]:
data = pd.read_csv('owid-covid-data.csv', usecols = ['iso_code', 'continent', 'location', 'date', 'total_cases', 'total_deaths', 'population'])

In [194]:
pd.set_option('mode.chained_assignment', None)

In [198]:
data = data.dropna()
world_data = pd.DataFrame(data.groupby("date")["total_cases", "total_deaths"].sum()).reset_index()
world_data


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0,date,total_cases,total_deaths
0,2020-01-22,548.0,17.0
1,2020-01-23,643.0,18.0
2,2020-01-24,920.0,26.0
3,2020-01-25,1406.0,42.0
4,2020-01-26,2075.0,56.0
...,...,...,...
309,2020-11-26,60970421.0,1432032.0
310,2020-11-27,61642266.0,1442649.0
311,2020-11-28,62240864.0,1452395.0
312,2020-11-29,62727394.0,1459302.0


In [114]:
labels = ["Active cases","Deaths"]
values = world_data.loc[0, ["total_cases", "total_deaths"]]
fig = px.pie(world_data, values = values, names=labels, color_discrete_sequence=['rgb(77,146,33)','rgb(77,77,77)'],hole=0.7)
fig.show()

In [115]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=world_data.index, y=world_data['total_cases'],
                    mode='lines',
                    name='Confirmed cases'))


fig.update_layout(
    title='Evolution of Confirmed cases over time in the word',
        template='plotly_white',
      yaxis_title="Confirmed cases",
    xaxis_title="Days",

)

fig.show()

In [116]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=world_data.index, y=world_data['total_deaths'],name='Deaths',
                                   marker_color='black',mode='lines',line=dict( dash='dot') ))

fig.update_layout(
    title='Evolution of Deaths over time in the world',
        template='plotly_white',
     yaxis_title="Deaths",
    xaxis_title="Days",

)

fig.show()

In [142]:
#cases each day
world_data["cases_per_day"] = world_data.total_cases.diff()
world_data["deaths_per_day"] =  world_data.total_deaths.diff()

In [141]:
fig = go.Figure(go.Bar(
            x=world_data['date'],
            y=world_data['cases_per_day'],
           ))
fig.update_layout(
    title='Cases In Each Day',
    template='plotly_white',
     xaxis_title="Cases",
    yaxis_title="Days",
)
fig.show()

In [143]:
fig = go.Figure(go.Bar(
            x=world_data['date'],
            y=world_data['deaths_per_day'],
    marker_color='rgb(13,48,100)'
           ))
fig.update_layout(
    title='Deaths In Each Day',
    template='plotly_white',
     xaxis_title="Deaths",
    yaxis_title="Days",
)
fig.show()

In [154]:
data_per_country = data.groupby("location")["location", "total_cases", "total_deaths"].tail(1).sort_values("total_cases", ascending = False).reset_index(drop = True)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [155]:
fig = go.Figure(go.Bar(
            x=data_per_country['total_cases'],
            y=data_per_country['location'],
            orientation='h'))
fig.update_layout(
    title='Confirmed Cases In Each Country',
    template='plotly_white',
     xaxis_title="Confirmed Cases",
    yaxis_title="Countries",
)
fig.show()

In [157]:
fig = go.Figure(go.Bar(
            x=data_per_country['total_deaths'],
            y=data_per_country['location'],
            orientation='h',
            marker_color='black',))
fig.update_layout(
    title='Deaths In Each Country',
    template='plotly_white',
    xaxis_title="Deaths",
    yaxis_title="Countries",
)
fig.show()

In [160]:
fig = px.choropleth(data_per_country, locations=data_per_country['location'],
                    color=data_per_country['total_cases'],locationmode='country names', 
                    hover_name=data_per_country['location'], projection = "natural earth",
                    color_continuous_scale=px.colors.sequential.Tealgrn,template='plotly_dark', )
fig.update_layout(
    title='Confirmed Cases In Each Country',
)
fig.show()


In [162]:
fig = px.choropleth(data_per_country, locations=data_per_country['location'],
                    color=data_per_country['total_deaths'],locationmode='country names', 
                    hover_name=data_per_country['location'], 
                    color_continuous_scale=px.colors.sequential.Tealgrn,template='plotly_dark', )
fig.update_layout(
    title='Deaths In Each Country',
)
fig.show()

In [163]:
fig = go.Figure(data=[go.Bar(
            x=data_per_country['location'][0:10], y=data_per_country['total_cases'][0:10],
            text=data_per_country['total_cases'][0:10],
            textposition='auto',
            marker_color='black',
            

        )])
fig.update_layout(
    title='Most 10 infected Countries',
    xaxis_title="Countries",
    yaxis_title="Confirmed Cases",
        template='plotly_white'

)
fig.show()

In [172]:
fig = px.pie(data_per_country, values=data_per_country['total_cases'], names=data_per_country['location'],
             title='Recovered cases',
            )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(
    template='plotly_white'
)
fig.show()

In [199]:
#South Korea
sk = data.loc[data.location == 'South Korea']

sk['date']= pd.to_datetime(sk['date'])

sk['date'] = sk['date'].apply(lambda x: datetime.strftime(x, "%d %b"))

In [182]:
#Spain
sp = data.loc[data.location == 'Spain']

sp['date']= pd.to_datetime(sp['date'])

sp['date'] = sp['date'].apply(lambda x: datetime.strftime(x, "%d %b"))

In [183]:
#United States
us = data.loc[data.location == 'United States'].reset_index(drop=True)

us['date']= pd.to_datetime(us['date'])

us['date'] = us['date'].apply(lambda x: datetime.strftime(x, "%d %b"))

In [200]:
fig_data = [go.Scatter(x = sk['date'], y = sk['total_cases'], mode = 'lines', name = 'South Korea'), 
        go.Scatter(x = sp['date'], y = sp['total_cases'], mode = 'lines', name = 'Spain'), 
        go.Scatter(x = us['date'], y = us['total_cases'], mode = 'lines', name = 'United States')]
layout = go.Layout(title = 'COVID cases trend in South Korea, Spain and United States')
figure = go.Figure(data = fig_data, layout = layout)
iplot(figure)

In [185]:
fig1 = px.line(sk, x="date", y="total_cases", title = "South Korea COVID cases trend")

iplot(fig1)

In [186]:
fig2 = px.line(sp, x="date", y="total_cases", title = "Spain COVID cases trend")

iplot(fig2)

In [187]:
fig3 = px.line(us, x="date", y="total_cases", title = "United States COVID cases trend")

iplot(fig3)

In [188]:
ct_data = pd.read_csv(r'C:\Users\Sai Sheshank Vaidya\Covid-19DataAnalysis\Data\Part-2\covid-contact-tracing.csv')
ct_data

Unnamed: 0,Entity,Code,Date,contact_tracing
0,Afghanistan,AFG,2020-01-01,0
1,Afghanistan,AFG,2020-01-02,0
2,Afghanistan,AFG,2020-01-03,0
3,Afghanistan,AFG,2020-01-04,0
4,Afghanistan,AFG,2020-01-05,0
...,...,...,...,...
57863,Zimbabwe,ZWE,2020-11-12,1
57864,Zimbabwe,ZWE,2020-11-13,1
57865,Zimbabwe,ZWE,2020-11-14,1
57866,Zimbabwe,ZWE,2020-11-15,1


In [189]:
px.choropleth(ct_data, 
              locations = "Code",
              color = "contact_tracing",
              color_discrete_sequence = px.colors.qualitative.Plotly,
              projection = "natural earth",
              hover_name = "Entity", 
              animation_frame="Date")

In [196]:
world_data

Unnamed: 0,date,total_cases,total_deaths
0,2020-01-22,548.0,17.0
1,2020-01-23,643.0,18.0
2,2020-01-24,920.0,26.0
3,2020-01-25,1406.0,42.0
4,2020-01-26,2075.0,56.0
...,...,...,...
309,2020-11-26,60970421.0,1432032.0
310,2020-11-27,61642266.0,1442649.0
311,2020-11-28,62240864.0,1452395.0
312,2020-11-29,62727394.0,1459302.0


In [201]:
data

Unnamed: 0,iso_code,continent,location,date,total_cases,total_deaths,population
59,AFG,Asia,Afghanistan,2020-03-22,40.0,1.0,38928341.0
60,AFG,Asia,Afghanistan,2020-03-23,40.0,1.0,38928341.0
61,AFG,Asia,Afghanistan,2020-03-24,74.0,1.0,38928341.0
62,AFG,Asia,Afghanistan,2020-03-25,84.0,2.0,38928341.0
63,AFG,Asia,Afghanistan,2020-03-26,94.0,4.0,38928341.0
...,...,...,...,...,...,...,...
59289,ZWE,Africa,Zimbabwe,2020-11-26,9623.0,274.0,14862927.0
59290,ZWE,Africa,Zimbabwe,2020-11-27,9714.0,275.0,14862927.0
59291,ZWE,Africa,Zimbabwe,2020-11-28,9822.0,275.0,14862927.0
59292,ZWE,Africa,Zimbabwe,2020-11-29,9822.0,275.0,14862927.0


In [203]:
per_mil_data = pd.read_csv('owid-covid-data.csv', usecols = ['iso_code', 'continent', 'location', 'date', 'total_cases_per_million', 'total_deaths_per_million'])
per_mil_data = per_mil_data.dropna()
per_mil_data

Unnamed: 0,iso_code,continent,location,date,total_cases_per_million,total_deaths_per_million
59,AFG,Asia,Afghanistan,2020-03-22,1.028,0.026
60,AFG,Asia,Afghanistan,2020-03-23,1.028,0.026
61,AFG,Asia,Afghanistan,2020-03-24,1.901,0.026
62,AFG,Asia,Afghanistan,2020-03-25,2.158,0.051
63,AFG,Asia,Afghanistan,2020-03-26,2.415,0.103
...,...,...,...,...,...,...
59289,ZWE,Africa,Zimbabwe,2020-11-26,647.450,18.435
59290,ZWE,Africa,Zimbabwe,2020-11-27,653.572,18.502
59291,ZWE,Africa,Zimbabwe,2020-11-28,660.839,18.502
59292,ZWE,Africa,Zimbabwe,2020-11-29,660.839,18.502


In [209]:
#South Korea
sk_pm = per_mil_data.loc[per_mil_data.location == 'South Korea']

sk_pm['date']= pd.to_datetime(sk_pm['date'])

sk_pm['date'] = sk_pm['date'].apply(lambda x: datetime.strftime(x, "%d %b"))
sk_pm

Unnamed: 0,iso_code,continent,location,date,total_cases_per_million,total_deaths_per_million
28904,KOR,Asia,South Korea,20 Feb,2.029,0.020
28905,KOR,Asia,South Korea,21 Feb,3.979,0.039
28906,KOR,Asia,South Korea,22 Feb,8.446,0.039
28907,KOR,Asia,South Korea,23 Feb,11.742,0.117
28908,KOR,Asia,South Korea,24 Feb,16.248,0.156
...,...,...,...,...,...,...
29184,KOR,Asia,South Korea,26 Nov,641.457,10.065
29185,KOR,Asia,South Korea,27 Nov,650.976,10.182
29186,KOR,Asia,South Korea,28 Nov,659.734,10.201
29187,KOR,Asia,South Korea,29 Nov,667.087,10.260


In [210]:
#Spain
sp_pm = per_mil_data.loc[per_mil_data.location == 'Spain']

sp_pm['date']= pd.to_datetime(sp_pm['date'])

sp_pm['date'] = sp_pm['date'].apply(lambda x: datetime.strftime(x, "%d %b"))

sp_pm

Unnamed: 0,iso_code,continent,location,date,total_cases_per_million,total_deaths_per_million
16946,ESP,Europe,Spain,03 Mar,3.529,0.021
16947,ESP,Europe,Spain,04 Mar,4.748,0.043
16948,ESP,Europe,Spain,05 Mar,5.540,0.064
16949,ESP,Europe,Spain,06 Mar,8.555,0.107
16950,ESP,Europe,Spain,07 Mar,10.694,0.214
...,...,...,...,...,...,...
17214,ESP,Europe,Spain,26 Nov,34592.290,949.079
17215,ESP,Europe,Spain,27 Nov,34824.416,955.367
17216,ESP,Europe,Spain,28 Nov,34824.416,955.367
17217,ESP,Europe,Spain,29 Nov,34824.416,955.367


In [211]:
#United States
us_pm = per_mil_data.loc[per_mil_data.location == 'United States'].reset_index(drop=True)

us_pm['date']= pd.to_datetime(us_pm['date'])

us_pm['date'] = us_pm['date'].apply(lambda x: datetime.strftime(x, "%d %b"))

us_pm

Unnamed: 0,iso_code,continent,location,date,total_cases_per_million,total_deaths_per_million
0,USA,North America,United States,29 Feb,0.076,0.003
1,USA,North America,United States,01 Mar,0.097,0.003
2,USA,North America,United States,02 Mar,0.166,0.018
3,USA,North America,United States,03 Mar,0.224,0.021
4,USA,North America,United States,04 Mar,0.323,0.033
...,...,...,...,...,...,...
271,USA,North America,United States,26 Nov,38921.936,795.927
272,USA,North America,United States,27 Nov,39542.950,800.169
273,USA,North America,United States,28 Nov,40013.024,803.761
274,USA,North America,United States,29 Nov,40432.668,806.256


In [213]:
fig_data = [go.Scatter(x = sk_pm['date'], y = sk_pm['total_cases_per_million'], mode = 'lines', name = 'South Korea'), 
        go.Scatter(x = sp_pm['date'], y = sp_pm['total_cases_per_million'], mode = 'lines', name = 'Spain'), 
        go.Scatter(x = us_pm['date'], y = us_pm['total_cases_per_million'], mode = 'lines', name = 'United States')]
layout = go.Layout(title = 'COVID cases per million trend in South Korea, Spain and United States')
figure = go.Figure(data = fig_data, layout = layout)
iplot(figure)