# SDG 3
**TEAM DATA DIVERS**\
*Written by Sidoine DAKO*

In [None]:
# Import the libraries
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
from datetime import datetime
## Geographical representation
import chart_studio.plotly as py
import plotly.offline as po
import plotly.graph_objs as pg
#import plotly.graph_objects as go
import matplotlib.pyplot as plt
import kaleido # save plotly graph
import plotly.express as px
po.init_notebook_mode(connected = True)

In [None]:
# Import the dataset
vac_data=pd.read_csv('https://covid19.who.int/who-data/vaccination-data.csv') #https://covid19.who.int/who-data/vaccination-data.csv
vac_data.head()

In [None]:
vac_data.info()

In [None]:
# Remove the NA in the dataset
print(any(vac_data.isna())) # Verify if there is missing data
np.sum(vac_data.isna()) # Total missing data values

In [None]:
# Remove the missing values
vac_data_rm=vac_data.dropna(axis=0,how="all")
vac_data_rm2=vac_data.dropna(axis=0)
np.sum(vac_data_rm2.isna())

Compute the correlation

In [None]:
pearsonr(np.array(vac_data_rm.dropna(axis=0)["PERSONS_FULLY_VACCINATED"]),np.array(vac_data_rm.dropna(axis=0)["NUMBER_VACCINES_TYPES_USED"]))

According to the correlation test, there is a weak relationship between the number of fully vaccinated persons and the number of vaccines types used.

In [None]:
pearsonr(np.array(vac_data_rm.dropna(axis=0)["PERSONS_VACCINATED_1PLUS_DOSE"]),np.array(vac_data_rm.dropna(axis=0)["NUMBER_VACCINES_TYPES_USED"]))

According to the correlation analysis, the relatioship between the persons which receive one or more than one dose and the number of vaccines types used per country is weak.

## Spatial distribution using covid-19 data on vaccine

In [None]:
full_vac = dict(
        type = 'choropleth',
        colorscale = 'Portland',
        locations = vac_data_rm['COUNTRY'],
        locationmode = "country names",
        z = vac_data_rm['PERSONS_FULLY_VACCINATED'],
        text = vac_data_rm['COUNTRY'],
        colorbar = {'title' : 'Persons fully vaccinated'},
      )

layout = dict(title = 'Global persons fully vaccinated against COVID-19 - robinson projection', 
              geo = dict( projection = {'type':'robinson'}, 
                         showlakes = True, 
                         lakecolor = 'rgb(0,191,255)'))
fig = pg.Figure(data = [full_vac], 
              layout = layout)
po.iplot(fig)
fig.write_image('fully_vac.png',scale=5)

The countries in grey are the one for which we don't have data.

In [None]:
dose1_vac = dict(
        type = 'choropleth',
        colorscale = 'Portland',
        locations = vac_data_rm['COUNTRY'],
        locationmode = "country names",
        z = vac_data_rm['PERSONS_VACCINATED_1PLUS_DOSE'],
        text = vac_data_rm['COUNTRY'],
        colorbar = {'title' : 'Persons 1+ dose vaccinated'},
      )

layout = dict(title = 'Global persons one dose vaccinated against COVID-19 - robinson projection', 
              geo = dict( projection = {'type':'robinson'}, 
                         showlakes = True, 
                         lakecolor = 'rgb(0,191,255)'))
fig = pg.Figure(data = [dose1_vac], 
              layout = layout)
po.iplot(fig)
fig.write_image('One+dose.png',scale=5)

In [None]:
num_vac = dict(
        type = 'choropleth',
        colorscale = 'Portland',
        locations = vac_data_rm['COUNTRY'],
        locationmode = "country names",
            z = vac_data_rm['NUMBER_VACCINES_TYPES_USED'],
        text = vac_data_rm['COUNTRY'],
        colorbar = {'title' : 'Number of vaccines types used'},
      )

layout = dict(title = 'Number of vaccines used against COVID-19 - robinson earth projection', 
              geo = dict( projection = {'type':'robinson'}, 
                         showlakes = True, 
                         lakecolor = 'rgb(0,191,255)'))
fig = pg.Figure(data = [num_vac], 
              layout = layout)
po.iplot(fig)
fig.write_image('num_vac.png',scale=5)

In [None]:
# Merge vaccination dataset with one wich have the population per country
owid_data=pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv") # https://covid.ourworldindata.org/data/owid-covid-data.csv

In [None]:
owid_data['date'] = pd.to_datetime(owid_data['date'])
owid_data.head(5)

In [None]:
# Extract the population and the country from the owid_data
owid_data_m=owid_data.groupby("location").max("population")

In [None]:
# Merge the two datasets
mergdata=pd.merge(vac_data_rm2,owid_data_m,how="right",left_on="COUNTRY",right_on="location",validate="one_to_one")
mergdata=mergdata.dropna(axis=0,how="all")
mergdata.head(5)

In [None]:
# Compute the vaccination rate
mergdata["vac_rate"]=(mergdata["PERSONS_VACCINATED_1PLUS_DOSE"]/mergdata["population"])*100
print(mergdata["vac_rate"].max())
mergdata=mergdata.sort_values(by="vac_rate",ascending=False)
mergdata.iloc[0,0]

In [None]:
# Compute correlation between the vaccination rate 
temp=mergdata[["vac_rate","NUMBER_VACCINES_TYPES_USED"]].dropna(axis=0)
pearsonr(np.array(temp.iloc[:,0]),np.array(temp.iloc[:,1]))

In [None]:
# Plot the vaccination rate
vac_rate = dict(
        type = 'choropleth',
        colorscale = 'Portland',
        locations = mergdata['COUNTRY'],
        locationmode = "country names",
        z = mergdata['vac_rate'],
        text = mergdata['COUNTRY'],
        colorbar = {'title' : 'Vaccination rate'},
      )
layout = dict(title = 'Vaccination rate against COVID-19 - robinson projection', 
              geo = dict( projection = {'type':'robinson'}, 
                         showlakes = True, 
                         lakecolor = 'rgb(0,191,255)'))
fig = pg.Figure(data = [vac_rate], 
              layout = layout)
po.iplot(fig)
fig.write_image('vac_rate.png',scale=5)

**Question:** Name of vaccines per country

In [None]:
sp=vac_data_rm["VACCINES_USED"].str.split("-",expand=False)
vac_coun=pd.merge(vac_data_rm["COUNTRY"],sp,left_index=True,right_index=True)
vac_coun=vac_coun.explode("VACCINES_USED")
used_wrld_vac=pd.crosstab(vac_coun["COUNTRY"],vac_coun["VACCINES_USED"],margins=True).iloc[-1,:-1]
#used_wrld_vac=used_wrld_vac/np.sum(used_wrld_vac)*100
fig=px.bar(used_wrld_vac)
fig.show()
fig.write_image('prop_vac.png',scale=5)
len(used_wrld_vac.index)