# Introduction

This notebook is about Covid19 vaccination in the World.

<font color='green'>
Content:
    
1. [Load and Check Data](#1)
2. [Data Visualization](#2)
    * [Bar Plot](#3)
    * [Pie Chart](#4)
    * [Bubble Charts](#5)
    * [Word Cloud](#6)
    * [Box Plot](#7)
    * [Scatter Matrix Plot](#8)
    * [3D Scatter Plot with Colorscaling](#9)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
%matplotlib inline

# plotly
# import plotly.plotly as py
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go

# word cloud library
from wordcloud import WordCloud

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<a id="1"></a> <br>
# Load and Check Data

* First of all, we are going to read the reports in our dataset.

In [None]:
covid_data=pd.read_csv('/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv')
covid_data.head()

In [None]:
covid_data.info()

In [None]:
covid_data.people_vaccinated.value_counts()

In [None]:
covid_data['country'].unique()

<a id="2"></a> <br>
# Data Visualization

* Now, we will use some kind of visualization techniques.

<a id="3"></a> <br>
## Bar Plot

In [None]:
values1=pd.notnull(covid_data['people_vaccinated'])
covid_data[values1].head()

In [None]:
country_list=list(covid_data['country'].unique())
country_vaccination_ratio=[]
for i in country_list:
    x=covid_data[covid_data['country']==i]
    country_vaccination_rate=sum(x.people_vaccinated)/len(x)
    country_vaccination_ratio.append(country_vaccination_rate)
data1=pd.DataFrame({'country_list': country_list,'country_vaccination_ratio':country_vaccination_ratio})
new_index=(data1['country_vaccination_ratio'].sort_values(ascending=False)).index.values
sorted_data=data1.reindex(new_index).iloc[:10,:]

#visualization
plt.figure(figsize=(15,10))
sns.barplot(x=sorted_data['country_list'], y=sorted_data['country_vaccination_ratio'])
plt.xticks(rotation=45)
plt.xlabel('Countries')
plt.ylabel('Vaccination Rate')
plt.title('Vaccination Rate Given Country')
plt.show()

In [None]:
# most vaccinated countries
country=covid_data.country.value_counts()
plt.figure(figsize=(20,7))
plt.xticks(rotation=45)
sns.barplot(x=country[:20].index,y=country[:20].values)
plt.title('Most Vaccinated Countries',color = 'green',fontsize=25)
plt.show()

In [None]:
covid_data.vaccines.value_counts()

In [None]:
# vaccines type
vaccines_type=covid_data.vaccines.value_counts()
plt.figure(figsize=(10,7))
plt.xticks(rotation=90)
sns.barplot(x=vaccines_type.index,y=vaccines_type.values)
plt.ylabel('Number of Vaccines')
plt.xlabel('Vaccines Types')
plt.title('Vaccines Type Counts',color = 'green',fontsize=20)
plt.show()

<a id="4"></a> <br>
## Pie Chart
2021-02-11's Daily Vaccinations of Last 7 Contries

In [None]:
last_day=covid_data[covid_data.date=='2021-02-11'].iloc[-7:]
last_day.head()

In [None]:
last_day=covid_data[covid_data.date=='2021-02-11'].iloc[-7:]
pie1=last_day.daily_vaccinations
labels=last_day.country

#figure
fig={
    "data":[
        {
            "values":pie1,
            "labels":labels,
            "domain":{"x":[0,.5]},
            "name":"Number of Daily Vaccinations",
            "hoverinfo":"label+percent+name",
            "hole":.3,
            "type":"pie"
        },],
    "layout": {
        "title":"Today's Number of Daily Vaccinations",
        "annotations":[
            {"font":{"size":20},
             "showarrow":False,
             "text":"",
             "x":0.20,
             "y":1
            },
        ]
    }
}
iplot(fig)

<a id="5"></a> <br>
## Buble Charts

In [None]:
# data preparation
last_day=covid_data[covid_data.date=='2021-02-11'].iloc[-7:]
last_day.head()

In [None]:
last_day=covid_data[covid_data.date=='2021-02-11'].iloc[-7:]
last_day_vaccination=last_day.daily_vaccinations
labels=last_day.country

fig = go.Figure(data=[go.Scatter(
    x=last_day_vaccination,
    y=labels,
    mode='markers',
    marker=dict(
        color=[120, 125, 130, 135, 140, 145],
        size=[15, 30, 55, 70, 90, 110],
        showscale=True
        )
)])

fig.show()

<a id="6"></a> <br>
## Word Cloud

In [None]:
# data prepararion
last_day=covid_data.country[covid_data.date=='2021-02-11']
plt.subplots(figsize=(8,8))
wordcloud = WordCloud(
                          background_color='white',
                          width=512,
                          height=384
                         ).generate(" ".join(last_day))
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('graph.png')

plt.show()

<a id="7"></a> <br>
## Box Plot

In [None]:
last_day=covid_data[covid_data.date=='2021-02-11']
last_day.head()

In [None]:
# data preparation
last_day=covid_data[covid_data.date=='2021-02-11'].iloc[:20,:]

trace0 = go.Box(
    y=last_day.total_vaccinations_per_hundred,
    name = 'total vaccinations per hundered in 2021-02-11',
    marker = dict(
        color = 'rgb(12, 12, 140)',
    )
)
trace1 = go.Box(
    y=last_day.people_vaccinated_per_hundred,
    name = 'people vaccinated per hundered in 2021-02-11',
    marker = dict(
        color = 'rgb(12, 128, 128)',
    )
)
data = [trace0, trace1]
iplot(data)

<a id="8"></a> <br>
## Scatter Matrix Plot

In [None]:
# import figure factory
import plotly.figure_factory as ff
# prepare data
dataframe = covid_data[covid_data.date=='2021-02-11'].iloc[:10,:]
data2021_02_11 = dataframe.loc[:,["total_vaccinations_per_hundred","people_vaccinated_per_hundred", "people_fully_vaccinated_per_hundred"]]
data2021_02_11["index"] = np.arange(1,len(data2021_02_11)+1)
# scatter matrix
fig = ff.create_scatterplotmatrix(data2021_02_11, diag='box', index='index',colormap='Portland',
                                  colormap_type='cat',
                                  height=700, width=700)
iplot(fig)

<a id="9"></a> <br>
## 3D Scatter Plot with Colorscaling

In [None]:
# create trace 1 that is 3d scatter
trace1 = go.Scatter3d(
    x=dataframe.total_vaccinations_per_hundred,
    y=dataframe.people_vaccinated_per_hundred,
    z=dataframe.people_fully_vaccinated_per_hundred,
    mode='markers',
    marker=dict(
        size=10,
        color='rgb(255,0,0)',                # set color to an array/list of desired values      
    )
)

data = [trace1]
layout = go.Layout(
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0  
    )
    
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)