# importing the necessary Python libraries and the dataset

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

data=pd.read_csv("country_vaccinations.csv")
data.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,35.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,35.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,35.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,35.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/


In [3]:
#data description 
data.describe()

Unnamed: 0,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million
count,18619.0,17754.0,14934.0,15356.0,33100.0,18619.0,17754.0,14934.0,33100.0
mean,13398840.0,6173030.0,3610556.0,234784.6,118141.1,31.711192,20.341474,13.511687,3472.557069
std,74875550.0,22637970.0,13069560.0,1273120.0,849954.6,36.848835,21.430504,16.885132,4541.310256
min,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,137936.0,111020.2,52002.75,4507.5,841.0,3.31,2.73,1.45,422.0
50%,980255.0,701703.0,400240.5,22133.0,6451.5,16.39,11.675,6.215,1966.0
75%,4722680.0,3128880.0,1853223.0,98591.5,36821.75,49.11,33.6075,19.7025,5223.0
max,1565872000.0,622000000.0,223299000.0,24741000.0,22424290.0,232.72,116.73,115.99,118759.0


In [4]:
data.country.value_counts()

Norway                             236
Latvia                             235
England                            230
Scotland                           230
Wales                              225
                                  ... 
Niue                                16
Haiti                                7
Pitcairn                             7
Bonaire Sint Eustatius and Saba      1
Turkmenistan                         1
Name: country, Length: 219, dtype: int64

In [5]:
data.vaccines.value_counts()

Oxford/AstraZeneca                                                                             4911
Johnson&Johnson, Moderna, Oxford/AstraZeneca, Pfizer/BioNTech                                  4337
Moderna, Oxford/AstraZeneca, Pfizer/BioNTech                                                   3059
Oxford/AstraZeneca, Sinopharm/Beijing                                                          2579
Oxford/AstraZeneca, Pfizer/BioNTech                                                            1870
Moderna, Pfizer/BioNTech                                                                       1562
Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sputnik V                              1525
Pfizer/BioNTech                                                                                1227
Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac                                                    972
Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac, Sputnik V                                         677


# Pre process the data

In [6]:
#refined  our required data 
df=data[["vaccines","country"]]
df.head()

Unnamed: 0,vaccines,country
0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",Afghanistan
1,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",Afghanistan
2,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",Afghanistan
3,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",Afghanistan
4,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",Afghanistan


# Prepare the Data
How many countries are taking each of the vaccines

In [11]:
dict={}
for i in df.vaccines.unique():
    dict[i]=[df["country"][j] for j in df[df["vaccines"]==i].index]
    
#print(dict)
vaccines={}
for key, value in dict.items():
    vaccines[key]=set(value)

print(vaccines)

{'Johnson&Johnson, Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing': {'Afghanistan'}, 'Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac, Sputnik V': {'Bosnia and Herzegovina', 'Oman', 'Tunisia', 'Albania'}, 'Sputnik V': {'Algeria', 'Guinea'}, 'Oxford/AstraZeneca, Pfizer/BioNTech': {'Costa Rica', 'Cape Verde', 'Andorra', 'Australia', 'Cayman Islands', 'Bermuda', 'Isle of Man', 'Saudi Arabia', 'Slovenia', 'Panama'}, 'Oxford/AstraZeneca': {'Burkina Faso', 'Wallis and Futuna', 'Antigua and Barbuda', 'Botswana', 'Uganda', 'British Virgin Islands', 'Lesotho', 'Jamaica', 'Saint Helena', 'Democratic Republic of Congo', 'Saint Vincent and the Grenadines', 'Solomon Islands', 'Fiji', 'Liberia', 'Mali', 'Bahamas', 'Madagascar', 'South Sudan', 'Saint Kitts and Nevis', 'Ethiopia', 'Haiti', 'Angola', 'Malawi', 'Saint Lucia', 'Barbados', "Cote d'Ivoire", 'Vanuatu', 'Sao Tome and Principe', 'Tonga', 'Yemen', 'Samoa', 'Cook Islands', 'Tuvalu', 'Georgia', 'Eswatini', 'Grenada', 'Montserrat', 'Niue', '

# Visualize what combination of vaccines every country is using

In [16]:
import plotly.express as px
import plotly.offline as py

v_map=px.choropleth(data, locations='iso_code', color='vaccines')
v_map.update_layout(height=500)

v_map.show()