In [9]:
import numpy as np 
import pandas as pd 
import datetime as datetime
import matplotlib.dates as dates
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Introduction 
The objective of our work is to show the stats of daily cases and deaths of covid-19, in the top 6 most populated contries in the world, and to also show together with this data the number of vaccinated people in those countries.
The top 6 most populated countries in the world are:

1. China (1,4 billion people)
2. India (1,38 billion people)
3. USA (331 million people)
4. Indonesia (273 million people)
5. Pakistan (220 million people)
6. Brazil (212 million people)

Source : https://www.worldometers.info/world-population/population-by-country/

In [10]:
df = pd.read_csv('datasets/country.csv')
vaccine = pd.read_csv('datasets/manufacturer.csv')
# covid_daily = pd.read_csv(r'../input/covid19-global-dataset/worldometer_coronavirus_daily_data.csv')

In [11]:
df.loc[(df['iso_code']=='IND')]

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
5276,India,IND,2021-01-15,0.0,0.0,,,,0.00,0.00,,,"Covaxin, Oxford/AstraZeneca",Government of India,http://mohfw.gov.in/pdf/CumulativeCOVIDVaccina...
5277,India,IND,2021-01-16,191181.0,191181.0,,191181.0,191181.0,0.01,0.01,,139.0,"Covaxin, Oxford/AstraZeneca",Government of India,http://mohfw.gov.in/pdf/CumulativeCOVIDVaccina...
5278,India,IND,2021-01-17,224301.0,224301.0,,33120.0,112150.0,0.02,0.02,,81.0,"Covaxin, Oxford/AstraZeneca",Government of India,http://mohfw.gov.in/pdf/CumulativeCOVIDVaccina...
5279,India,IND,2021-01-18,454049.0,454049.0,,229748.0,151350.0,0.03,0.03,,110.0,"Covaxin, Oxford/AstraZeneca",Government of India,http://mohfw.gov.in/pdf/CumulativeCOVIDVaccina...
5280,India,IND,2021-01-19,674835.0,674835.0,,220786.0,168709.0,0.05,0.05,,122.0,"Covaxin, Oxford/AstraZeneca",Government of India,http://mohfw.gov.in/pdf/CumulativeCOVIDVaccina...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5366,India,IND,2021-04-15,117223509.0,102349255.0,14874254.0,2730271.0,3269892.0,8.49,7.42,1.08,2369.0,"Covaxin, Oxford/AstraZeneca",Government of India,http://mohfw.gov.in/pdf/CumulativeCOVIDVaccina...
5367,India,IND,2021-04-16,119937641.0,104408484.0,15529157.0,2714132.0,3123212.0,8.69,7.57,1.13,2263.0,"Covaxin, Oxford/AstraZeneca",Government of India,http://mohfw.gov.in/pdf/CumulativeCOVIDVaccina...
5368,India,IND,2021-04-17,122622590.0,106431076.0,16191514.0,2684949.0,3003920.0,8.89,7.71,1.17,2177.0,"Covaxin, Oxford/AstraZeneca",Government of India,http://mohfw.gov.in/pdf/CumulativeCOVIDVaccina...
5369,India,IND,2021-04-18,123852566.0,107371770.0,16480796.0,1229976.0,2760572.0,8.97,7.78,1.19,2000.0,"Covaxin, Oxford/AstraZeneca",Government of India,http://mohfw.gov.in/pdf/CumulativeCOVIDVaccina...


In [12]:
def get_country_data(df,iso):
    return df.loc[(df['iso_code']==iso)]
def get_vac_data(df,location):
    return df.loc[(df['location']==location)]

In [13]:
#listing the 6 countries with the highest population
iso_6 = ['CHN','IND','USA','IDN','PAK','BRA']
dfs = []
for i in iso_6:
    dfs.append(get_country_data(df,i))
vac_USA = get_vac_data(vaccine,'United States')

# What vaccine is every country using

The only country we are interested that has the data of what vaccines are beeing used is the United States so we are going to show the evolution of every kind of vaccine. 

In [14]:
fig = px.line(vac_USA,x = 'date', y = 'total_vaccinations', color = 'vaccine' )
fig.write_html("file.html")
fig.show()

# Number of daily vaccinations by country

In [15]:

fig = go.Figure()
for i in range(6):
    fig.add_trace(go.Scatter(x=dfs[i]['date'], y=dfs[i]['daily_vaccinations'],
                    mode='lines',
                    name=iso_6[i]))
fig.update_layout(title='Number of daily vaccinated',
                   xaxis_title='Dates',
                   yaxis_title='No of people')
fig.show()

The numbers show that the countries which apply most of the vaccines in the top 6 most populated countries in the world are China, Usa and India, not exactly in that order. It also can be seen that the vaccination in Brazil and India is falling while the vaccination in China and USA are stable, This can be happening for different reasons, one of it could be the technological, while USA and China have different supliers and control of production Brazil and India still are dependable of importing goods nedded for the production of the vaccine. The countries wich have the least daily  number of applied vaccines are Indonesia and Pakistan.

# Number of vaccinated people 

In [16]:
fig = go.Figure()
for i in range(6):
    fig.add_trace(go.Scatter(x=dfs[i]['date'], y=dfs[i]['people_vaccinated'],
                    mode='lines',
                    name=iso_6[i]))
fig.update_layout(title='Number of vaccinated people',
                   xaxis_title='Dates',
                   yaxis_title='No of people')
fig.show()

China did not showed the data of people vaccinated, in this case we can look that India and USA are the countries, in our analysis, wich have the most number of people vaccinated but by the number of daily dosis applied we can deduce that China is also a big player in the number of vaccinated people.

In [17]:

fig = go.Figure()
for i in range(6):
    fig.add_trace(go.Scatter(x=dfs[i]['date'], y=dfs[i]['people_fully_vaccinated'],
                    mode='lines',
                    name=iso_6[i]))
fig.update_layout(title='Number of fully vaccinated people',
                   xaxis_title='Dates',
                   yaxis_title='No of people')
fig.show()

In [18]:
fig = go.Figure()
for i in range(6):
    fig.add_trace(go.Scatter(x=dfs[i]['date'], y=dfs[i]['total_vaccinations'],
                    mode='lines',
                    name=iso_6[i]))
fig.update_layout(title='Total vaccination',
                   xaxis_title='Dates',
                   yaxis_title='No of vaccines')
fig.show()

In [19]:
fig = go.Figure()
for i in range(6):
    fig.add_trace(go.Scatter(x=dfs[i]['date'], y=dfs[i]['total_vaccinations_per_hundred'],
                    mode='lines',
                    name=iso_6[i]))
fig.update_layout(title='Total vaccination per hundred',
                   xaxis_title='Dates',
                   yaxis_title='No of vaccines')
fig.show()

# USA cases, deaths and vaccinated people 

In [19]:
def daily_cases(df,country):
    return df.loc[(df['country']==country)]

In [20]:
#watching covid cases in USA,Brazil,China,India,Pakistan
covid_daily_USA = daily_cases(covid_daily,'USA')
covid_daily_IND = daily_cases(covid_daily,'India')
covid_daily_CHN = daily_cases(covid_daily,'China')
covid_daily_PAK = daily_cases(covid_daily,'Pakistan')
covid_daily_BRA = daily_cases(covid_daily,'Brazil')
covid_daily_IDN = daily_cases(covid_daily,'Indonesia')

NameError: name 'covid_daily' is not defined

In [13]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_USA['daily_new_cases'], x=covid_daily_USA['date'], name="No of cases daily (USA)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[2]['date'],y = dfs[2]['people_vaccinated'], name = 'People vaccinated' ),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="USA cases and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of cases of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

In [14]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_USA['daily_new_deaths'], x=covid_daily_USA['date'], name="No of deaths daily (USA)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[2]['date'],y = dfs[2]['people_vaccinated'], name = 'People vaccinated' ),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="USA cases and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of deaths of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

# Brazil cases, deaths and vaccinated people 

In [15]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_BRA['daily_new_cases'], x=covid_daily_BRA['date'], name="No of cases daily (BRA)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[-1]['date'],y = dfs[-1]['people_vaccinated'], name = 'People vaccinated'),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="Brazil cases and vaccination"
    
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of cases of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

In [16]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_BRA['daily_new_deaths'], x=covid_daily_BRA['date'], name="No of deaths daily (BRA)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[-1]['date'],y = dfs[-1]['people_vaccinated'], name = 'People vaccinated'),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="Brazil deaths and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of deaths of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

# India cases, deaths and vaccinated people 

In [17]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_IND['daily_new_cases'], x=covid_daily_IND['date'], name="No of cases daily (IND)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[1]['date'],y = dfs[1]['people_vaccinated'], name = 'People vaccinated'),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="India cases and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of cases of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

In [18]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_IND['daily_new_deaths'], x=covid_daily_IND['date'], name="No of deaths daily (IND)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[1]['date'],y = dfs[1]['people_vaccinated'], name = 'People vaccinated'),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="India deaths and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of cases of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

# Pakistan cases, deaths and vaccinated people 

In [19]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_PAK['daily_new_cases'], x=covid_daily_PAK['date'], name="No of cases daily (PAK)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[-2]['date'],y = dfs[-2]['people_vaccinated'], name = 'People vaccinated'),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="Pakistan cases and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of cases of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

In [20]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_PAK['daily_new_deaths'], x=covid_daily_PAK['date'], name="No of deaths daily (PAK)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[-2]['date'],y = dfs[-2]['people_vaccinated'], name = 'People vaccinated'),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="Pakistan deaths and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of deaths of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

# Indonesia cases, deaths and vaccinated people 

In [21]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_IDN['daily_new_cases'], x=covid_daily_IDN['date'], name="No of cases daily (IDN)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[3]['date'],y = dfs[3]['people_vaccinated'], name = 'People vaccinated'),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="Indonesia cases and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of cases of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

In [22]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_IDN['daily_new_deaths'], x=covid_daily_IDN['date'], name="No of deaths daily (PAK)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[3]['date'],y = dfs[3]['people_vaccinated'], name = 'People vaccinated'),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="Indonesia deaths and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of cases of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

# China cases, deaths and vaccinated people 

As China does not show the number of vaccinated people we will use the number of vaccines applied.In the other countries we used the number of vaccinated people with at leaast one doses of the vaccine aplied.

In [23]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_CHN['daily_new_cases'], x=covid_daily_CHN['date'], name="No of cases daily (CHN)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[0]['date'],y = dfs[0]['total_vaccinations'], name = 'applied vaccines'),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="China cases and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of cases of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

In [24]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(y=covid_daily_CHN['daily_new_deaths'], x=covid_daily_CHN['date'], name="No of deaths daily (CHN)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= dfs[0]['date'],y = dfs[0]['total_vaccinations'], name = 'applied vaccines'),
    secondary_y=True,
)

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    title_text="China deaths and vaccination"
)

fig.update_xaxes(title_text="date")

fig.update_yaxes(title_text="No of deaths of covid", secondary_y=False)
fig.update_yaxes(title_text="No of people with at least one dose", secondary_y=True)

fig.show()

In [8]:
df["Total_vaccinations(count)"] = df.groupby("country").total_vaccinations.tail(1)

In [13]:
df["Total_vaccinations(count)"].fillna(0, inplace=True)

In [15]:
dfToPlot = df.groupby("country")["Total_vaccinations(count)"].mean().sort_values(ascending=False)

In [18]:
dfToPlot.index

Index(['United States', 'China', 'India', 'Ethiopia', 'United Kingdom',
       'Brazil', 'England', 'Turkey', 'Germany', 'Indonesia',
       ...
       'Falkland Islands', 'Niger', 'Syria', 'Montserrat', 'El Salvador',
       'Pakistan', 'Egypt', 'United Arab Emirates', 'Belarus', 'Kuwait'],
      dtype='object', name='country', length=190)

In [23]:
go.Figure({
    "type" : 'choropleth',
    "locations" : dfToPlot.index,
    "locationmode" : 'country names',
    # "colorscale" : ['Viridis'],
    "z" : dfToPlot.values
})

In [26]:
df['a'] = df.groupby("country").people_fully_vaccinated_per_hundred.tail(1)

In [28]:
df.groupby("country")["a"].mean().sort_values(ascending=False)

country
Gibraltar           91.45
Israel              57.67
Seychelles          51.38
Falkland Islands    50.96
Cayman Islands      43.72
                    ...  
Uganda                NaN
Uzbekistan            NaN
Venezuela             NaN
Vietnam               NaN
Zambia                NaN
Name: a, Length: 190, dtype: float64