# Pandas Tutorial

Shouvik Mani <br>
September 24, 2021 <br>
COMS 4995: Applied Machine Learning, Columbia University

In [1]:
import numpy as np
import pandas as pd

### 1. DataFrames and Series 

DataFrames and Series are the fundamental data structures of Pandas. A Pandas DataFrame represents a data matrix, but unlike a Numpy matrix, a DataFrame has named columns and rows (indices).

Let's read in a DataFrame from a CSV file. This is a dataset on daily COVID-19 vaccinations by country. Source: https://www.kaggle.com/gpreda/covid-world-vaccination-progress?select=country_vaccinations.csv.

In [2]:
# Read in a DataFrame from a csv file
vaccine_df = pd.read_csv('data/covid_vaccinations/country_vaccinations.csv')
vaccine_df

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.00,0.00,,,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45979,Zimbabwe,ZWE,2021-09-16,4964302.0,2930550.0,2033752.0,108486.0,51755.0,32.89,19.42,13.48,3429.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
45980,Zimbabwe,ZWE,2021-09-17,,,,,45993.0,,,,3047.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
45981,Zimbabwe,ZWE,2021-09-18,4992501.0,2940750.0,2051751.0,,40514.0,33.08,19.49,13.59,2684.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
45982,Zimbabwe,ZWE,2021-09-19,5015041.0,2948725.0,2066316.0,22540.0,40630.0,33.23,19.54,13.69,2692.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...


Dimensions of DataFrame

In [3]:
vaccine_df.shape

(45984, 15)

Type of each column in DataFrame

In [4]:
vaccine_df.dtypes

country                                 object
iso_code                                object
date                                    object
total_vaccinations                     float64
people_vaccinated                      float64
people_fully_vaccinated                float64
daily_vaccinations_raw                 float64
daily_vaccinations                     float64
total_vaccinations_per_hundred         float64
people_vaccinated_per_hundred          float64
people_fully_vaccinated_per_hundred    float64
daily_vaccinations_per_million         float64
vaccines                                object
source_name                             object
source_website                          object
dtype: object

Summary statistics for each column

In [5]:
vaccine_df.describe()

Unnamed: 0,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million
count,25084.0,23946.0,21048.0,20600.0,45677.0,25084.0,23946.0,21048.0,45677.0
mean,20487940.0,8757993.0,5583303.0,259785.0,130853.5,43.740659,26.569498,19.689742,3547.60352
std,117360300.0,34301730.0,22117210.0,1317281.0,875853.8,45.044427,24.458658,21.467135,4429.261509
min,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,207767.8,166852.5,80358.5,4898.0,871.0,5.27,4.1125,2.3,505.0
50%,1459863.0,998103.5,635215.0,24106.5,6774.0,26.44,18.6,10.645,2191.0
75%,7737194.0,4679778.0,3270408.0,111688.8,41695.0,73.4425,46.3,32.2525,5307.0
max,2180986000.0,1100842000.0,1022207000.0,24741000.0,22424290.0,235.39,118.27,117.12,117497.0


Head and tail of DataFrame

In [6]:
vaccine_df.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...


In [7]:
vaccine_df.tail()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
45979,Zimbabwe,ZWE,2021-09-16,4964302.0,2930550.0,2033752.0,108486.0,51755.0,32.89,19.42,13.48,3429.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
45980,Zimbabwe,ZWE,2021-09-17,,,,,45993.0,,,,3047.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
45981,Zimbabwe,ZWE,2021-09-18,4992501.0,2940750.0,2051751.0,,40514.0,33.08,19.49,13.59,2684.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
45982,Zimbabwe,ZWE,2021-09-19,5015041.0,2948725.0,2066316.0,22540.0,40630.0,33.23,19.54,13.69,2692.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...
45983,Zimbabwe,ZWE,2021-09-20,5044809.0,2961845.0,2082964.0,29768.0,41779.0,33.43,19.63,13.8,2768.0,"Oxford/AstraZeneca, Sinopharm/Beijing, Sinovac...",Ministry of Health,https://www.arcgis.com/home/webmap/viewer.html...


A Series represents a column in a DataFrame, and is analogous to a vector. It is also named and has row indices.

In [8]:
vaccine_df['country']

0        Afghanistan
1        Afghanistan
2        Afghanistan
3        Afghanistan
4        Afghanistan
            ...     
45979       Zimbabwe
45980       Zimbabwe
45981       Zimbabwe
45982       Zimbabwe
45983       Zimbabwe
Name: country, Length: 45984, dtype: object

Besides reading in DataFrames and Series from a CSV file, we can also create them from scratch.

In [9]:
players = ['Lionel Messi', 'Cristiano Ronaldo', 'Neymar', 'Kylian Mbappe', 'Mohamed Salah']
players = pd.Series(players)   # Creates a Series of players
players

0         Lionel Messi
1    Cristiano Ronaldo
2               Neymar
3        Kylian Mbappe
4        Mohamed Salah
dtype: object

In [10]:
player_teams = [['Lionel Messi', 'Paris Saint-Germain', 34],
                ['Cristiano Ronaldo', 'Manchester United', 36],
                ['Neymar', 'Paris Saint-Germain', 29],
                ['Kylian Mbappe', 'Paris Saint-Germain', 22],
                ['Mohamed Salah', 'Liverpool', 29]]

# Creates a DataFrame of players, teams, and ages
player_teams_df = pd.DataFrame(player_teams, columns=['Name', 'Team', 'Age'])
player_teams_df

Unnamed: 0,Name,Team,Age
0,Lionel Messi,Paris Saint-Germain,34
1,Cristiano Ronaldo,Manchester United,36
2,Neymar,Paris Saint-Germain,29
3,Kylian Mbappe,Paris Saint-Germain,22
4,Mohamed Salah,Liverpool,29


### 2. Indexing and Iterating

The index of a DataFrame or Series serves as a (not necessarily unique) identifier for each row. By default, a DataFrame has the index 0, 1, 2, ..., but we can also set the index as we please.

In [11]:
# Default index is 0, 1, 2, ..., 4
list(player_teams_df.index)

[0, 1, 2, 3, 4]

Let's set the index of this DataFrame to the player's names.

In [12]:
player_teams_df = player_teams_df.set_index('Name')
player_teams_df   # now the DataFrame is indexed by player name

Unnamed: 0_level_0,Team,Age
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Lionel Messi,Paris Saint-Germain,34
Cristiano Ronaldo,Manchester United,36
Neymar,Paris Saint-Germain,29
Kylian Mbappe,Paris Saint-Germain,22
Mohamed Salah,Liverpool,29


To access an element by it's DataFrame index, use the `.loc` method.

In [13]:
player_teams_df.loc['Neymar']

Team    Paris Saint-Germain
Age                      29
Name: Neymar, dtype: object

In [14]:
player_teams_df.loc[['Lionel Messi', 'Neymar', 'Kylian Mbappe']]

Unnamed: 0_level_0,Team,Age
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Lionel Messi,Paris Saint-Germain,34
Neymar,Paris Saint-Germain,29
Kylian Mbappe,Paris Saint-Germain,22


To access an element by it's numeric index, use the `.iloc`  method.

In [15]:
player_teams_df.iloc[2]

Team    Paris Saint-Germain
Age                      29
Name: Neymar, dtype: object

In [16]:
player_teams_df.iloc[[2, 3, 4]]

Unnamed: 0_level_0,Team,Age
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Neymar,Paris Saint-Germain,29
Kylian Mbappe,Paris Saint-Germain,22
Mohamed Salah,Liverpool,29


We can iterate over a Series using the `.iteritems` method.

In [17]:
for index, name in players.iteritems():
    print(index, name)

0 Lionel Messi
1 Cristiano Ronaldo
2 Neymar
3 Kylian Mbappe
4 Mohamed Salah


We can iterate over a DataFrame using the `.iterrows` method.

In [18]:
for index, row in player_teams_df.iterrows():
    print(index, '\n', row['Team'], '\n', row['Age'], '\n')

Lionel Messi 
 Paris Saint-Germain 
 34 

Cristiano Ronaldo 
 Manchester United 
 36 

Neymar 
 Paris Saint-Germain 
 29 

Kylian Mbappe 
 Paris Saint-Germain 
 22 

Mohamed Salah 
 Liverpool 
 29 



### 3. Filtering and Sorting

Pandas offers a flexible interface for filtering Series and DataFrames based on various conditions.

In [19]:
# Filter vaccine_df to rows where country is 'United States'
vaccine_df_usa = vaccine_df[vaccine_df['country'] == 'United States']
print(vaccine_df_usa.shape)
vaccine_df_usa.head()

(275, 15)


Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
43869,United States,USA,2020-12-20,556208.0,556208.0,,,,0.17,0.17,,,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43870,United States,USA,2020-12-21,614117.0,614117.0,,57909.0,57909.0,0.18,0.18,,172.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43871,United States,USA,2020-12-22,,,,,127432.0,,,,379.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43872,United States,USA,2020-12-23,1008025.0,1008025.0,,,150606.0,0.3,0.3,,448.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43873,United States,USA,2020-12-24,,,,,191001.0,,,,568.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...


In [20]:
# Filter vaccine_df to rows where country is 'United States' OR country is 'Mexico'
vaccine_df_usa_mex = vaccine_df[(vaccine_df['country'] == 'United States') | (vaccine_df['country'] == 'Mexico')]
print(vaccine_df_usa_mex.shape)
vaccine_df_usa_mex.head()

(544, 15)


Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
26400,Mexico,MEX,2020-12-24,2924.0,2924.0,,,,0.0,0.0,,,"CanSino, Johnson&Johnson, Moderna, Oxford/Astr...",Secretary of Health,http://www.gob.mx/cms/uploads/attachment/file/...
26401,Mexico,MEX,2020-12-25,,,,,1300.0,,,,10.0,"CanSino, Johnson&Johnson, Moderna, Oxford/Astr...",Secretary of Health,http://www.gob.mx/cms/uploads/attachment/file/...
26402,Mexico,MEX,2020-12-26,,,,,1300.0,,,,10.0,"CanSino, Johnson&Johnson, Moderna, Oxford/Astr...",Secretary of Health,http://www.gob.mx/cms/uploads/attachment/file/...
26403,Mexico,MEX,2020-12-27,6824.0,6824.0,,,1300.0,0.01,0.01,,10.0,"CanSino, Johnson&Johnson, Moderna, Oxford/Astr...",Secretary of Health,http://www.gob.mx/cms/uploads/attachment/file/...
26404,Mexico,MEX,2020-12-28,9579.0,9579.0,,2755.0,1664.0,0.01,0.01,,13.0,"CanSino, Johnson&Johnson, Moderna, Oxford/Astr...",Secretary of Health,http://www.gob.mx/cms/uploads/attachment/file/...


In [21]:
# Filter vaccine_df to rows where country is 'United States' AND dates after 2021-01-01
vaccine_df_usa = vaccine_df[(vaccine_df['country'] == 'United States') & (vaccine_df['date'] >= '2021-01-01')]
print(vaccine_df_usa.shape)
vaccine_df_usa.head()

(263, 15)


Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
43881,United States,USA,2021-01-01,,,,,302329.0,,,,899.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43882,United States,USA,2021-01-02,4225756.0,4225756.0,,,325882.0,1.26,1.26,,969.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43883,United States,USA,2021-01-03,,,,,336949.0,,,,1002.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43884,United States,USA,2021-01-04,4563260.0,4563260.0,,,348017.0,1.36,1.36,,1035.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43885,United States,USA,2021-01-05,4836469.0,4836469.0,,273209.0,339372.0,1.44,1.44,,1009.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...


We can also sort the DataFrame with a specific column as the key.

In [22]:
vaccine_df_usa_sorted_by_daily_vacs = vaccine_df_usa.sort_values('daily_vaccinations', ascending=False)
vaccine_df_usa_sorted_by_daily_vacs.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
43983,United States,USA,2021-04-13,192282781.0,122295530.0,75322283.0,2590736.0,3384387.0,57.17,36.36,22.4,10063.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43986,United States,USA,2021-04-16,202282923.0,127743096.0,80609818.0,3965883.0,3349306.0,60.15,37.98,23.97,9959.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43985,United States,USA,2021-04-15,198317040.0,125822868.0,78498290.0,3525204.0,3348189.0,58.97,37.41,23.34,9955.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43984,United States,USA,2021-04-14,194791836.0,123917385.0,76681252.0,2509055.0,3330740.0,57.92,36.84,22.8,9903.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...
43982,United States,USA,2021-04-12,189692045.0,120848490.0,74066085.0,2644914.0,3214893.0,56.4,35.93,22.02,9559.0,"Johnson&Johnson, Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/COVID...


### 4. Useful Pandas Operations

View unique elements in a Series

In [24]:
vaccine_df['country'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire Sint Eustatius and Saba',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire",
       'Croatia', 'Cuba', 'Curacao', 'Cyprus', 'Czechia',
       'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'England',
       'Equatorial Guinea', 'Estonia', 'Eswatini', 'Ethiopia',
       'Faeroe Islands', 'Fal

View counts of items in a Series

In [25]:
vaccine_df['country'].value_counts()

Denmark         293
Norway          292
Latvia          291
Scotland        286
England         286
               ... 
Haiti            64
Tokelau          57
Niue             43
Tanzania         35
Turkmenistan      1
Name: country, Length: 222, dtype: int64

Add a column to a DataFrame

In [26]:
player_teams_df['Nationality'] = ['Argentina', 'Portugal', 'Brazil', 'France', 'Egypt']
player_teams_df

Unnamed: 0_level_0,Team,Age,Nationality
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lionel Messi,Paris Saint-Germain,34,Argentina
Cristiano Ronaldo,Manchester United,36,Portugal
Neymar,Paris Saint-Germain,29,Brazil
Kylian Mbappe,Paris Saint-Germain,22,France
Mohamed Salah,Liverpool,29,Egypt


Remove missing data

In [27]:
print(vaccine_df.shape)

# Remove rows where daily_vaccinations column has a missing entry
vaccine_df = vaccine_df.dropna(subset=['daily_vaccinations'])

print(vaccine_df.shape)
vaccine_df.head()

(45984, 15)
(45677, 15)


Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...
5,Afghanistan,AFG,2021-02-27,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://app.powerbi.com/view?r=eyJrIjoiYTkyM2V...


Save DataFrame to CSV

In [28]:
vaccine_df.to_csv('data/covid_vaccinations/country_vaccinations_processed.csv', index=False)

In [29]:
player_teams_df.to_csv('data/player_teams.csv')