# Data visualization for the percentage of population fully vaccinated against COVID-19

__Author: Atharva Vishnu Raverkar__

__Matrikelnummer : 419275__

__Notebook Description:__

- This notebook will show a graph of percentage of population fully vaccinated against COVID-19 for selected countries
- The data set used for this analysis is available on John Hopkins COVID-19 vaccination GitHub page. 
- To find the percentage of population infected, the number of infected people for a specific date is divided by population of that country.
- The data for population of a country is taken from a different John Hopkins GitHub page. It is available as a csv file on the GitHub page for vaccinated population.
- It is possible that the path from which data is taken in this notebook may not match exactly with the path on the user's computer. In such an event, my my sincere apologies. Please change the path in that case to a correct one. 

In [None]:
import pandas as pd 
import numpy as np

%matplotlib inline

import matplotlib as mpl 
import matplotlib.pyplot as plt

import seaborn as sns

import plotly
import plotly.graph_objects as go 

In [None]:
mpl.rcParams['figure.figsize'] = (20,16)
pd.set_option('display.max_rows', 500)

sns.set(style = 'darkgrid')

# John Hopkins Data For Vaccination 

In [None]:
path = 'https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/vaccine_data/global_data/time_series_covid19_vaccine_global.csv'
p_data_vaccine = pd.read_csv(path)

In [None]:
p_data_vaccine.head()

In [None]:
#Checking what is the difference between the two dates mentioned in the columns
df_compare = np.where(p_data_vaccine['Date'] == p_data_vaccine['Report_Date_String'], 'True', 'False')
print(np.where(df_compare == 'False'))

p_data_vaccine.dtypes # Checking the datatypes of all the remaining columns. 


Based on above, it is sufficient to have the Reporting date of vaccination for the analysis. 
And the other column named 'Date' can be dropped completely. 

In [None]:
# Dropping the first 'Date' column also dropping the "Province_State" columns to make the data more manageable. 
p_data=p_data_vaccine.drop(['Date', 'Province_State'], axis =1)


# Also renaming the columns for simplicity 

p_data = p_data.rename(columns = {'Country_Region':'Country', 'People_partially_vaccinated':'partially_vaccinated', 'People_fully_vaccinated':'fully_vaccinated', 'Report_Date_String':'Date'})

p_data.head()


In [None]:
#Converting the date to string using 'datetime'.
p_data['Date'] = pd.to_datetime(p_data['Date'])

In [None]:
# Using Pivot table for rearraging the data 
pivot = pd.pivot_table(data = p_data,
                                  index = 'Date',
                                  columns = 'Country',
                                  values = 'fully_vaccinated',
                                  )

# Replacing the NaN values in the table 
piot = pivot.fillna(0)


In [None]:
# Replaving NaN values and resetting the index of the table
pivot= pivot.replace(np.nan,0)

pivot.reset_index(inplace = True)

pivot.head()

In [None]:
# Isolating the date time index
time_idx= pivot["Date"]
df_plot = pd.DataFrame({'Date': time_idx})
df_plot.head()

In [None]:
# Making a list of countries 
country_list = ['India', 'US', 'Italy']

In [None]:
# Importing the data for the world population
path_wp = 'https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/world_pop_by_country.csv'
wp = pd.read_csv(path_wp)
wp.head()

In [None]:
# Finding the percentage of fully vaccinated people in each country  
for each in country_list:
    df_plot[each] = pivot[each]
    var = np.array(wp.loc[wp["Country Name"]==each, "2018"])
    df_plot[each] = df_plot[each]/var[0]*100 
    

    
df_plot.head()




In [None]:
# Plotting the above data with respect to time
fig = go.Figure()

for each in country_list:
    fig.add_trace(go.Scatter(x = df_plot.Date, 
                         y = df_plot[each], 
                        mode = 'markers+lines',
                        opacity = 0.9, 
                        line_width = 1,
                        marker_size = 2,
                        name = each))

fig.update_layout(width =900,
                 height = 700, 
                 xaxis_title = 'Time',
                 yaxis_title = "Percentage of total population fully-vaccinated(source: John Hopkins vaccinations, linear-scale)", 
                 )

fig.update_yaxes(type = 'linear', range = [0, 90])


# Choose the browser according to your preference

#fig.show(renderer='chrome')
#fig.show(renderer='firefox')

fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()

In [None]:
import dash
dash.__version__
from dash import dcc
from dash import html

app = dash.Dash()
app.layout = html.Div([
    
    html.Label('Multi-Select Country'),
    
    dcc.Dropdown(
        id='country_drop_down',
        options=[
            {'label': 'India', 'value': 'India'},
            {'label': 'US', 'value': 'US'},
            {'label': 'Italy', 'value': 'Italy'}
        ],
        value=['US'], # which are pre-selected
        multi=True
    ),   
        
    dcc.Graph(figure=fig, id='main_window_slope')
])


In [None]:
from dash.dependencies import Input, Output

@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value')])
def update_figure(country_list):
    
    traces = [] 
    for each in country_list:
        traces.append(dict(x=df_plot.Date,
                                y=df_plot[each],
                                mode='markers+lines',
                                opacity=0.9,
                                line_width=1,
                                marker_size=2, 
                                name=each
                        )
                )
        
    return {
            'data': traces,
            'layout': dict (
                width=1280,
                height=720,
                xaxis_title="Time",
                yaxis_title="Percentage of total population fully-vaccinated(source johns hopkins csse, linear-scale)",
                xaxis={'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                        
                      },
                yaxis={'type':"linear",
                       'range':'[1.1,5.5]'
                      }
        )
    }

In [None]:
app.run_server(debug = True, use_reloader = False)