# Covid19 Global Analysis

<div align="center">
<img src= "https://setorsaude.com.br/wp-content/uploads/2020/08/Vacina-contra-Covid-que-ser%C3%A1-testada-pelo-Hospital-S%C3%A3o-Lucas-da-PUC-RS-chega-nesta-segunda-feira-a-Porto-Alegre.jpg" width="640" height= "480" />
</div>

## Introduction

Welcome! In this notebook we will analyze the global data of covid 19: confirmed cases, deaths, recoveries, vaccinations etc.

If you are not interested in seeing the data processing, only the analysis, go straight to [Visualization](#Visualization)

### Menu:
- [Imports](#Imports)
- [Data Cleaning](#Data-Cleaning)
    - [Summary Data](#Summary-Data)
    - [Vaccines Data](#Vaccines-Data)
    - [Creating Country DataFrame](#Creating-Country-DataFrame)
    - [Creating Vaccines DataFrame](#Creating-Vaccines-DataFrame)
- [Visualization](#Visualization)
   - [Summary Visualization](#Summary-Visualization)
   - [Vaccination-Visualization](#Vaccination-Visualization)


## Imports


In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
%matplotlib inline

import plotly.express as ex
import plotly.graph_objects as go
import plotly.offline as pyo
import plotly
from datetime import datetime
pyo.init_notebook_mode()
plotly.io.orca.config.save()
import os

if not os.path.exists("images"):
    os.mkdir("images")

import warnings
warnings.filterwarnings('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

## Data Cleaning

### Summary Data

In [None]:
daily = pd.read_csv('../input/covid19-global-dataset/worldometer_coronavirus_daily_data.csv')
daily.info()

In [None]:
daily['date'] = pd.to_datetime(daily['date'])

In [None]:
summary = pd.read_csv('../input/covid19-global-dataset/worldometer_coronavirus_summary_data.csv')
summary.info()

### Vaccines Data

In [None]:
df = pd.read_csv("../input/d/gpreda/covid-world-vaccination-progress/country_vaccinations.csv")

df.info()

In [None]:
df['date'] = pd.to_datetime(df['date'])
df.drop(columns= ['source_name', 'source_website'])

### Creating Country DataFrame

In [None]:
def to_dataframe(df, col):
    
    data = df.groupby('country')[col].max()
    data = pd.DataFrame(data)
    
    return data

columns_to_join = ['people_vaccinated', 
                   'people_fully_vaccinated',
                   'total_vaccinations_per_hundred', 
                   'people_vaccinated_per_hundred',
                   'people_fully_vaccinated_per_hundred',
                   'vaccines']

df_country = pd.DataFrame(df.groupby(['country'])['total_vaccinations'].max())

for col in columns_to_join:
    df_country = df_country.join(to_dataframe(df, col))

df_country = df_country.reset_index()

df_country.loc[df_country['people_vaccinated_per_hundred'] > 100, 'people_vaccinated_per_hundred'] = 100
df_country.loc[df_country['people_fully_vaccinated_per_hundred'] > 100, 'people_fully_vaccinated_per_hundred'] = 100

### Creating Vaccines DataFrame

In [None]:
#creating a dataframe of vaccines
df_vaccines = pd.DataFrame(columns= ['vaccine', 'country']) 

i = 0
for row in df_country['vaccines']:
    vaccines = row.split(', ')
    for v in vaccines:
        new_row = {'vaccine' : v, 'country' : df_country['country'][i]}
        df_vaccines = df_vaccines.append(new_row, ignore_index= True)
        
    i += 1


In [None]:
df_country.to_csv('Country.csv')
df.to_csv('World Vaccination')

# Visualization

## Summary Visualization

### Total Cases

In [None]:
plt.figure(figsize= (7, 9))
sns.set_theme()

plt.subplot(2, 1, 1)
ax = sns.barplot(x= summary['country'], 
            y= summary['total_confirmed'],
            order= summary.sort_values('total_confirmed', ascending= False)['country'][:10],
            )
plt.title('TOTAL CONFIRMED CASES PER COUNTRY', size= 17)
plt.xlabel('')
plt.ylabel('Total Cases', size= 15)
plt.xticks(rotation= 90, size= 13)

for patch in ax.patches:
    width = patch.get_width() 
    height= patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    plt.text(x + 0.15, y + height * 1.01, '{}M'.format(int(height/1000000)), size= 10)

plt.subplot(2, 1, 2)
ax = sns.barplot(x= summary['country'], 
            y= summary['population'],
            order= summary.sort_values('total_confirmed', ascending= False)['country'][:10], 
            color= 'lightblue')


sns.barplot(x= summary['country'], 
            y= summary['total_confirmed'],
            order= summary.sort_values('total_confirmed', ascending= False)['country'][:10], 
            color= 'gray')

plt.title('TOTAL CONFIRMED X POPULATION', size= 20)
plt.xlabel('')
plt.ylabel('Total Cases', size= 15)
plt.xticks(rotation= 90, size= 13)

population_patch = mpatches.Patch(color='lightblue', label='Population')
cases_patch = mpatches.Patch(color='gray', label='Total Cases')
plt.legend(handles=[population_patch, cases_patch])

plt.subplots_adjust(hspace=0.5)

plt.show()

In [None]:
top_countries = daily.loc[daily['country'].isin(summary.sort_values('total_confirmed', ascending= False).reset_index()['country'][:10])]

plt.figure(figsize= (10, 15))

i = 1
for country in top_countries['country'].unique():
    
    data = top_countries.loc[top_countries['country'] == country]
    
    plt.subplot(5, 2, i)
    sns.lineplot(x = data['date'],
                y = data['daily_new_cases'],
                color= 'gray')


    sns.lineplot(x = data['date'],
                y = data['daily_new_deaths'],
                color= 'black')
    
    plt.title(str(country))
    plt.xticks(['2020', '2021'])
    plt.ylabel('')
    plt.xlabel('')
    
    i += 1

    cases_patch = mpatches.Patch(color='gray', label='Daily Cases')
    deaths_patch = mpatches.Patch(color='black', label='Daily Deaths')
    plt.legend(handles=[cases_patch, deaths_patch])
    
plt.tight_layout()
plt.show()

### Deaths

In [None]:
plt.figure(figsize= (7, 9))

plt.subplot(2, 1, 1)
ax = sns.barplot(x= summary['country'], 
            y= summary['total_deaths'],
            order= summary.sort_values('total_deaths', ascending= False)['country'][:10],
            )
plt.title('TOTAL DEATHS PER COUNTRY', size= 22)
plt.xlabel('')
plt.ylabel('Total Cases', size= 15)
plt.xticks(rotation= 90, size= 13)


for patch in ax.patches:
    width = patch.get_width() 
    height= patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    
    plt.text(x + 0.15, y + height * 1.01, '{}k'.format(int(height/1000)), size= 10)

plt.subplot(2, 1, 2)
sns.barplot(x= summary['country'], 
            y= summary['total_confirmed'],
            order= summary.sort_values('total_deaths', ascending= False)['country'][:10], 
            color= 'gray')
   
sns.barplot(x= summary['country'], 
            y= summary['total_deaths'],
            order= summary.sort_values('total_deaths', ascending= False)['country'][:10], 
            color= 'black')

plt.title('TOTAL DEATHS X TOTAL CONFIRMED', size= 20)
plt.xlabel('')
plt.ylabel('Total Cases', size= 15)
plt.xticks(rotation= 90, size= 13)

    
cases_patch = mpatches.Patch(color='gray', label='Total Cases')
deaths_patch = mpatches.Patch(color='black', label='Total Deaths')
plt.legend(handles=[cases_patch, deaths_patch])
plt.subplots_adjust(hspace=0.5)

plt.show()

In [None]:
top_countries = daily.loc[daily['country'].isin(summary.sort_values('total_deaths', ascending= False).reset_index()['country'][:10])]

plt.figure(figsize= (10, 15))

i = 1
for country in top_countries['country'].unique():
    
    data = top_countries.loc[top_countries['country'] == country]
    
    plt.subplot(5, 2, i)
    sns.lineplot(x = data['date'],
                y = data['daily_new_deaths'],
                color= 'steelblue')
    
    plt.title(str(country))
    plt.xticks(['2020', '2021'])
    plt.ylabel('')
    plt.xlabel('')
    
    i += 1

    deaths_patch = mpatches.Patch(color='steelblue', label='Daily Deaths')
    plt.legend(handles=[deaths_patch])
    
plt.tight_layout()
plt.show()

### Recovered

In [None]:
plt.figure(figsize= (7, 9))

plt.subplot(2, 1, 1)
ax = sns.barplot(x= summary['country'], 
            y= summary['total_recovered'],
            order= summary.sort_values('total_recovered', ascending= False)['country'][:10],
            )
plt.title('TOTAL RECOVERED PER COUNTRY', size= 22)
plt.xlabel('')
plt.ylabel('Total Cases', size= 15)
plt.xticks(rotation= 90, size= 13)

for patch in ax.patches:
    width = patch.get_width() 
    height= patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    plt.text(x + 0.15, y + height * 1.01, '{}M'.format(int(height/1000000)), size= 10)

plt.subplot(2, 1, 2)
sns.barplot(x= summary['country'], 
            y= summary['total_confirmed'],
            order= summary.sort_values('total_recovered', ascending= False)['country'][:10], 
            color= 'gray')
   
sns.barplot(x= summary['country'], 
            y= summary['total_recovered'],
            order= summary.sort_values('total_recovered', ascending= False)['country'][:10], 
            color= 'lightblue')

sns.barplot(x= summary['country'], 
            y= summary['total_deaths'],
            order= summary.sort_values('total_recovered', ascending= False)['country'][:10], 
            color= 'black')

plt.title('TOTAL RECOVERED X TOTAL CONFIRMED', size= 20)
plt.xlabel('')
plt.ylabel('Total Cases', size= 15)
plt.xticks(rotation= 90, size= 13)

    
cases_patch = mpatches.Patch(color='gray', label='Total Cases')
recovered_patch = mpatches.Patch(color='lightblue', label='Total Recovered')
deaths_patch = mpatches.Patch(color='black', label='Total Deaths')
plt.legend(handles=[cases_patch, recovered_patch, deaths_patch])
plt.subplots_adjust(hspace=0.5)

    
plt.show()

## Vaccination Visualization

In [None]:
title = 'Percentage of the Vaccinated Population per Country'
fig = ex.choropleth(df_country, locations="country", 
                    locationmode='country names',
                    color="people_vaccinated_per_hundred", 
                    hover_name="country", 
                    title='Percentage of the Vaccinated Population',
                    color_continuous_scale= ex.colors.sequential.RdBu,
                    width= 800,
                    height= 400
                   )

fig.update_layout(title=title, 
                  title_x= 0.5)
fig.show()

- **Insights**
    - Most countries vaccinated less than half the population.
    - The countries with the greatest progress in vaccination are in North America and Europe.
    - The least vaccinated countries are in Africa.
    
    

In [None]:
title = 'Used Vaccines Distribuition'
fig = ex.choropleth(df_vaccines, locations="country", 
                    locationmode='country names',
                    color="vaccine",
                    facet_col = 'vaccine',
                    facet_col_wrap = 4,
                    facet_col_spacing = 0.002,
                    title='Used Vaccines Distribuition',
                    color_continuous_scale= ex.colors.sequential.RdBu,
                    width= 1200,
                    height= 800
                   )

fig.update_layout(title=title, 
                  title_x= 0.5)

fig.show()

In [None]:
sns.set_theme()
plt.figure(figsize= (8, 5))
ax = sns.countplot(df_vaccines['vaccine'], order= df_vaccines['vaccine'].value_counts().index)

plt.title('Most used Vaccines', size= 20)
plt.xlabel(None)
plt.xticks(rotation= 90, size= 14)

for patch in ax.patches:
    width = patch.get_width()
    height = patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    plt.text(x + 0.1, y + height + 2, '{}'.format(height))
    
plt.show()


- **Insights**
    - The most popular vaccines are AstraZeneca, Pfizer and Moderna.

In [None]:
countries = df.groupby('country')['total_vaccinations'].max().sort_values(ascending= False)[:5].index

top_countries = pd.DataFrame(columns= df.columns)
for country in countries:
    top_countries = top_countries.append(df.loc[df['country'] == country])


plt.figure(figsize= (8, 10))
sns.set_theme()
plt.subplot(2, 1, 1)
sns.lineplot(x= top_countries['date'].dt.strftime('%Y-%m-%d'), y= top_countries['total_vaccinations'], hue= top_countries['country'], ci= False)

plt.title('Total Vaccinations', size= 20)
plt.xlabel(None)
plt.ylabel('Total Vaccinations', size= 15)

locs, labels = plt.xticks()
plt.xticks(ticks= range(0,len(labels), len(labels)//6))

plt.subplot(2, 1, 2)
sns.lineplot(top_countries['date'].dt.strftime('%Y-%m-%d'), 
             top_countries['daily_vaccinations_per_million'], 
             hue= top_countries['country'], ci= False)
plt.title('Daily Vaccinations per Country', size= 20)
plt.xlabel(None)
plt.ylabel('Vaccinations', size= 15)
locs, labels = plt.xticks()
plt.xticks(ticks= range(0,len(labels), len(labels)//6))

plt.show()

- **Insights**
    - Starting in March, China started a mass vaccination.
    - From April onwards, the number of daily vaccinations in the United States has steadily declined while China's has increased exponentially.

In [None]:
plt.figure(figsize= (8, 5))


ax = sns.barplot(data= df_country, x= 'total_vaccinations', y= 'country',
            order= df_country.sort_values('total_vaccinations', ascending= False)['country'][:15],
            palette= 'GnBu_r')

plt.title('Top 15 Total Vaccinations per Country', size= 18)
plt.xlabel('Total Vaccinations', size= 15)
plt.ylabel('Country', size=15)

for patch in ax.patches:
    width = patch.get_width()
    height = patch.get_height()
    x = patch.get_x()
    y = patch.get_y()


    
    plt.text(width + x, height + y, '{:.1f} M'.format(int(width)/1000000))


plt.show()

In [None]:
plt.figure(figsize= (8, 5))


ax = sns.barplot(data= df_country, x= 'total_vaccinations', y= 'country',
           order= df_country.sort_values('total_vaccinations')['country'][:15],
            palette= 'GnBu_r')

plt.title('Minors Vaccinations per Country', size= 20)
plt.xlabel('Total Vaccinations', size= 15)
plt.ylabel('Country', size= 15)

for patch in ax.patches:
    width = patch.get_width()
    height = patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    plt.text(width + x, height + y, '{}'.format(int(width)))

In [None]:
plt.figure(figsize= (8, 5))


ax = sns.barplot(data= df_country, y= 'country', x= 'people_vaccinated',
                order= df_country.sort_values('people_vaccinated', ascending= False)['country'][:15],
                palette= 'GnBu_r')

plt.title('People Vaccinated per Country', size= 18)
plt.xlabel('People Vaccinated', size= 15)
plt.ylabel('Country', size= 15)

for patch in ax.patches:
    width = patch.get_width()
    height = patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    plt.text(width + x, height + y, '{:.1f} M'.format(int(width)/1000000))

In [None]:

plt.figure(figsize= (8, 5))


ax = sns.barplot(data= df_country, y= 'country', x= 'people_fully_vaccinated',
                order= df_country.sort_values('people_fully_vaccinated', ascending= False)['country'][:15],
                palette= 'GnBu_r')

plt.title('People Fully Vaccinated per Country', size= 17)
plt.xlabel('People Fully Vaccinated', size= 15)
plt.ylabel('Country', size= 15)

for patch in ax.patches:
    width = patch.get_width()
    height = patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    plt.text(width + x, height + y, '{:.1f} M'.format(int(width)/1000000))

In [None]:
plt.figure(figsize= (8, 5))


ax = sns.barplot(data= df_country, y= 'country', x= 'total_vaccinations_per_hundred',
                order= df_country.sort_values('total_vaccinations_per_hundred', ascending= False)['country'][:15],
                palette= 'GnBu_r')

plt.title('Total Vaccinations / Population', size= 20)
plt.xlabel('Total Vaccinations', size= 15)
plt.ylabel('Country', size= 15)

for patch in ax.patches:
    width = patch.get_width()
    height = patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    plt.text(width + x, height + y, '{:.1f} %'.format(width))

In [None]:
plt.figure(figsize= (8, 5))


ax = sns.barplot(data= df_country, y= 'country', x= 'people_vaccinated_per_hundred',
                order= df_country.sort_values('people_vaccinated_per_hundred', ascending= False)['country'][:15],
                palette= 'GnBu_r')

plt.title('Percentage of the Vaccinated Population', size= 17)
plt.xlabel('People Vaccinated', size= 15)
plt.ylabel('Country', size= 15)

for patch in ax.patches:
    width = patch.get_width()
    height = patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    plt.text(width + x, height + y, '{:.1f} %'.format(width))