### Our World in Data - exploratory data analysis

### 1. Importing  data 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')

file = 'locations.csv'

owid_df = pd.read_csv(file, sep =',')
owid_df.head(5)

Unnamed: 0,location,iso_code,vaccines,last_observation_date,source_name,source_website
0,Afghanistan,AFG,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",2021-09-16,World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
1,Albania,ALB,"Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac, ...",2021-09-28,Ministry of Health,https://shendetesia.gov.al/vaksinimi-anticovid...
2,Algeria,DZA,"Oxford/AstraZeneca, Sinopharm/Beijing, Sputnik V",2021-09-25,World Health Organization,https://covid19.who.int/
3,Andorra,AND,"Oxford/AstraZeneca, Pfizer/BioNTech",2021-08-29,Government of Andorra,https://who.maps.arcgis.com/apps/dashboards/ea...
4,Angola,AGO,Oxford/AstraZeneca,2021-09-26,World Health Organization,https://covid19.who.int/


In [2]:
owid_df.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 222 entries, 0 to 221
Data columns (total 6 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   location               222 non-null    object
 1   iso_code               222 non-null    object
 2   vaccines               222 non-null    object
 3   last_observation_date  222 non-null    object
 4   source_name            222 non-null    object
 5   source_website         222 non-null    object
dtypes: object(6)
memory usage: 10.5+ KB


### 2. Preparing data

In [3]:
# splitting vaccine column
owid_df1= owid_df['vaccines'].str.split(',', expand=True)

owid_df1.columns = ['VaccineNo'+str(i) for i in range(1,9)]

In [4]:
#creating column names
vaccine_df = pd.concat([owid_df, owid_df1], axis=1)
vaccine_df.drop(columns = ['vaccines']).head(5)

Unnamed: 0,location,iso_code,last_observation_date,source_name,source_website,VaccineNo1,VaccineNo2,VaccineNo3,VaccineNo4,VaccineNo5,VaccineNo6,VaccineNo7,VaccineNo8
0,Afghanistan,AFG,2021-09-16,World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...,Johnson&Johnson,Oxford/AstraZeneca,Pfizer/BioNTech,Sinopharm/Beijing,,,,
1,Albania,ALB,2021-09-28,Ministry of Health,https://shendetesia.gov.al/vaksinimi-anticovid...,Oxford/AstraZeneca,Pfizer/BioNTech,Sinovac,Sputnik V,,,,
2,Algeria,DZA,2021-09-25,World Health Organization,https://covid19.who.int/,Oxford/AstraZeneca,Sinopharm/Beijing,Sputnik V,,,,,
3,Andorra,AND,2021-08-29,Government of Andorra,https://who.maps.arcgis.com/apps/dashboards/ea...,Oxford/AstraZeneca,Pfizer/BioNTech,,,,,,
4,Angola,AGO,2021-09-26,World Health Organization,https://covid19.who.int/,Oxford/AstraZeneca,,,,,,,


In [5]:
#removing leading and trailing spaces in vaccine columns
for i in range(1,9):
    vaccine_df['VaccineNo'+str(i)] = vaccine_df['VaccineNo'+str(i)].str.rstrip()
    vaccine_df['VaccineNo'+str(i)] = vaccine_df['VaccineNo'+str(i)].str.lstrip()

### 3. Number of countries per vaccine types

In [6]:
#Number of countries by the first vaccine choice
df1=vaccine_df.groupby(["VaccineNo1"])[["location"]].count()
df1.index.names=['Vaccine']
df1.rename(columns={'location':'NumberOfCountries'}, inplace = True)
df1.head(20)

Unnamed: 0_level_0,NumberOfCountries
Vaccine,Unnamed: 1_level_1
Abdala,1
COVIran Barekat,1
CanSino,7
Covaxin,5
EpiVacCorona,2
Johnson&Johnson,53
Medigen,1
Moderna,39
Oxford/AstraZeneca,92
Pfizer/BioNTech,13


In [7]:
#number of countries with DfNo vaccine choices
def CreateDFs(DfNo):
   df = vaccine_df.groupby(['VaccineNo'+str(DfNo)])[["location"]].count()
   df.index.names=['Vaccine']
   df.rename(columns={'location':'NumberOfCountries'}, inplace = True)
   return df

In [8]:
df2 = CreateDFs(2) 
df3 = CreateDFs(3) 
df4 = CreateDFs(4) 
df5 = CreateDFs(5) 


In [9]:
df6 = CreateDFs(6) 
df7 = CreateDFs(7) 
df8 = CreateDFs(8)

In [10]:
#merging dataframes
new_df = pd.merge(df1,df2, on = ['Vaccine'], how = 'outer', suffixes = ['1', '2'])
new_df1 = new_df['NumberOfCountries1'].fillna(0) + new_df['NumberOfCountries2'].fillna(0)

new_df = pd.merge(df3,df4, on = ['Vaccine'], how = 'outer', suffixes = ['1', '2'])
new_df2 = new_df['NumberOfCountries1'].fillna(0) + new_df['NumberOfCountries2'].fillna(0)

new_df = pd.merge(df5,df6, on = ['Vaccine'], how = 'outer', suffixes = ['1', '2'])
new_df3 = new_df['NumberOfCountries1'].fillna(0) + new_df['NumberOfCountries2'].fillna(0)

new_df = pd.merge(df7,df8, on = ['Vaccine'], how = 'outer', suffixes = ['1', '2'])
new_df4 = new_df['NumberOfCountries1'].fillna(0) + new_df['NumberOfCountries2'].fillna(0)

In [11]:
type(new_df1)

pandas.core.series.Series

In [12]:
df_new = pd.concat([new_df1,new_df2, new_df3, new_df4], axis = 1)
df_new1 =  df_new[0].fillna(0) + df_new[1].fillna(0) + df_new[2].fillna(0) + df_new[3].fillna(0)

In [13]:
#number of countries per vaccine type
df_new1.head(100)

Vaccine
Abdala                  1.0
COVIran Barekat         1.0
CanSino                 7.0
Covaxin                 7.0
EpiVacCorona            2.0
Johnson&Johnson        54.0
Medigen                 1.0
Moderna                75.0
Oxford/AstraZeneca    182.0
Pfizer/BioNTech       136.0
QazVac                  1.0
Sinopharm/Beijing      74.0
Sputnik V              54.0
Sinopharm/Wuhan         3.0
Sinovac                42.0
Soberana02              1.0
ZF2001                  2.0
dtype: float64

In [14]:
#graph 

### 4. Number of vaccines per country

In [15]:
#graph