In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# <p style="background-color:rgba(255,0,0,0.5);font-family: 'Pangolin', cursive;font-size:150%;color:white;text-align:center;border-radius:20px;"><b>Covid-19 Analys </b></p>
![](https://cdn.evrimagaci.org/Aabf8iLZ1iI8KE31fQuWkWeRUnA=/evrimagaci.org%2Fpublic%2Fcontent_media%2F93aa0b6a1659d5669e2fc32b76aef1f7.jpg)

**Items in the dataset:**
- **Countries**
- **Dates**
- **Vaccines**
- **Total Vaccinations**

<hr>

**Desired data to find:**

- **Most commonly used vaccines in countries**
- **Average daily vaccination count in countries**
-**Number of countries where vaccines are used**
- **Choropleth map of the most used vaccine**

# <p style="background-color:rgba(255,0,0,0.5);font-family: 'Pangolin', cursive;font-size:125%;color:white;text-align:center;border-radius:20px 20px;"><b>Loading the dataset</b></p>

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from folium.features import Choropleth
import folium
from folium.features import Tooltip
import seaborn as sns

In [None]:
df = pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations_by_manufacturer.csv")

In [None]:
df.head(10)

In [None]:
df["location"].nunique()

In [None]:
df.isnull().sum()

In [None]:
df.dtypes

It would be better to convert the Date column to the datetime type.

In [None]:
df['date'] = pd.to_datetime(df['date'])

In our dataset, the Total Vaccinations represent the cumulative sum of vaccinations up to that date. To express the usage of different vaccines by countries, we need to clean the dataset and transform it.

In [None]:
data=pd.DataFrame(columns=['Country', 'Vaccine', 'Total_vaccine'])
for country in df["location"].unique():
    for vaccine in df["vaccine"].unique():
        filtered_data = df[(df['location'] == country) & (df['vaccine'] == vaccine)]
        total_count = filtered_data['total_vaccinations'].max()
        data = pd.concat([data, pd.DataFrame({'Country': [country], 'Vaccine': [vaccine], 'Total_vaccine': [total_count]})], ignore_index=True)

In [None]:
data.head(10)

Since our new dataset includes rows for all countries and vaccine brands, we need to handle missing data.

In [None]:
data.dropna(axis=0,inplace=True)

In [None]:
data.head(20)

# <p style="background-color:rgba(255,0,0,0.3);font-family: 'Pangolin', cursive;font-size:125%;color:white;text-align:center;border-radius:20px 20px;"><b>Most commonly used vaccines in countries</b></p>

It would be valuable to analyze the most commonly used vaccines in countries and create visualizations based on that.

In [None]:
data_2=pd.DataFrame(columns=['Country', 'Vaccine'])
data["Total_vaccine"] = pd.to_numeric(data["Total_vaccine"], errors="coerce")
for country in data["Country"].unique():
    new_data = data[data["Country"] == country]
    max_vaccine = new_data.loc[new_data["Total_vaccine"].idxmax(), "Vaccine"]
    data_2 = pd.concat([data_2, pd.DataFrame({'Country': [country], 'Vaccine': [max_vaccine]})], ignore_index=True)

In [None]:
data_2.head() 

In [None]:
data_2["Vaccine"].value_counts().plot(kind="bar",
                                    color=["Red","Gray","Gray","Gray"])

Since the BioNTech vaccine is more widely used, I prefer to focus on analyzing it.

# <p style="background-color:rgba(255,0,0,0.3);font-family: 'Pangolin', cursive;font-size:125%;color:white;text-align:center;border-radius:20px 20px;"><b>Average daily vaccination count in countries</b></p>
Since the dataset does not provide the daily vaccination count, we can calculate the average vaccination count by dividing the total vaccinations by the number of days between the first and last date.

In [None]:
number_of_days = (df["date"].max() -df["date"].min() ).days

In [None]:
dtfrm=data[data["Vaccine"]=="Pfizer/BioNTech"]
dtfrm = dtfrm.drop(dtfrm[dtfrm['Country'] == 'European Union'].index)

In [None]:
dtfrm.head(10)

In [None]:
dtfrm["average_vaccination_count"] = dtfrm["Total_vaccine"] / number_of_days
dtfrm["average_vaccination_count"] =dtfrm["average_vaccination_count"].astype(int)

In [None]:
dtfrm.head(15)

In [None]:
dtfrm.set_index("Country",inplace=True)

In [None]:
color=["Lightblue","Purple","Green","Orange","darkgoldenrod","tan","Gray","Blue","Pink","Lightgreen"]
dtfrm["average_vaccination_count"].sort_values(ascending=False).head(10).plot(kind="bar",color=color)

# <p style="background-color:rgba(255,0,0,0.3);font-family: 'Pangolin', cursive;font-size:125%;color:white;text-align:center;border-radius:20px 20px;"><b>Number of countries where vaccines are used</b></p>

In [None]:
number_of_vaccines = data.groupby('Vaccine')['Country'].nunique()

In [None]:
number_of_vaccines.sort_values(ascending=False).plot(kind="bar",color="r")

# <p style="background-color:rgba(255,0,0,0.3);font-family: 'Pangolin', cursive;font-size:125%;color:white;text-align:center;border-radius:20px 20px;"><b>Choropleth map of the most used vaccine</b></p>
Visualizing country-level data on a map is a logical choice. Therefore, we will create a choropleth map showing the usage of the BioNTech vaccine by countries.

In [None]:
fig = px.choropleth(data_frame=dtfrm,
                    locations=dtfrm.index,
                    locationmode='country names',
                    color='Total_vaccine',
                    color_continuous_scale='YlOrRd',
                    title='Ülkelerde Yapılan Biontech Aşıları')

fig.update_layout(title_x=0.5)

We can also create the same visualization using the Folium library.

In [None]:
m = folium.Map(location=[0, 0], zoom_start=2)
Choropleth(
    geo_data='https://raw.githubusercontent.com/johan/world.geo.json/master/countries.geo.json', 
    name='choropleth',
    data=dtfrm,  
    columns=[dtfrm.index, 'Total_vaccine'],  
    key_on='feature.properties.name', 
    fill_color='YlOrRd',  
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Aşı Sayısı', 
).add_to(m)

In [None]:
m