<img src="https://pandas.pydata.org/_static/pandas_logo.png" width="400" align="left"/>

In [1]:
from IPython.display import IFrame

In [2]:
IFrame(src='https://www.youtube.com/embed/26ZioEwRw00', width=640, height=400)

In [3]:
import pandas as pd

# Python Dictionary

In [4]:
emissions2016_dict = { 'Estonia' : 15.0, 
              'Luxembourg' : 19.8, 
              'Netherlands': 12.2,
              'Sweden': 5.6 } # tonnes per capita of greenhouse gases

https://ec.europa.eu/eurostat/web/climate-change/data/database

In [5]:
emissions2016_dict.keys()

dict_keys(['Estonia', 'Luxembourg', 'Netherlands', 'Sweden'])

In [6]:
emissions2016_dict.values()

dict_values([15.0, 19.8, 12.2, 5.6])

# Pandas Series

In [7]:
emissions2016 = pd.Series(data=[15.0,19.8,12.2,5.6],
                         index=['Estonia','Luxembourg','Netherlands','Sweden'],
                         name=2016)

In [8]:
emissions2016

Estonia        15.0
Luxembourg     19.8
Netherlands    12.2
Sweden          5.6
Name: 2016, dtype: float64

In [9]:
emissions2016 = pd.Series(data=emissions2016_dict,name=2016)

In [10]:
emissions2016

Estonia        15.0
Luxembourg     19.8
Netherlands    12.2
Sweden          5.6
Name: 2016, dtype: float64

In [11]:
emissions2016.values

array([15. , 19.8, 12.2,  5.6])

In [12]:
type(emissions2016.values)

numpy.ndarray

In [13]:
emissions2016 * 1000

Estonia        15000.0
Luxembourg     19800.0
Netherlands    12200.0
Sweden          5600.0
Name: 2016, dtype: float64

In [14]:
pd.Series(data=[15.0,19.8,12.2,5.6],name=2016)

0    15.0
1    19.8
2    12.2
3     5.6
Name: 2016, dtype: float64

# Querying a Series

In [15]:
emissions2016.loc['Sweden']

5.6

In [16]:
emissions2016.iloc[3]

5.6

# DataFrame

In [17]:
emissions2014_dict = { 'Estonia' : 16.1, 
              'Luxembourg' : 21.5, 
              'Netherlands': 11.7,
              'Sweden': 5.8, 
              'Italy': 7.1 }

In [18]:
emissions2014 = pd.Series(data=emissions2014_dict,name=2014)

In [19]:
df = pd.DataFrame(data=[emissions2014,emissions2016])
df

Unnamed: 0,Estonia,Luxembourg,Netherlands,Sweden,Italy
2014,16.1,21.5,11.7,5.8,7.1
2016,15.0,19.8,12.2,5.6,


In [20]:
pd.DataFrame(data=[emissions2014_dict,emissions2016_dict],index=[2014,2016])

Unnamed: 0,Estonia,Italy,Luxembourg,Netherlands,Sweden
2014,16.1,7.1,21.5,11.7,5.8
2016,15.0,,19.8,12.2,5.6


In [21]:
df.T

Unnamed: 0,2014,2016
Estonia,16.1,15.0
Luxembourg,21.5,19.8
Netherlands,11.7,12.2
Sweden,5.8,5.6
Italy,7.1,


In [22]:
df = pd.DataFrame(data={2014:emissions2014_dict,2016:emissions2016_dict})
df

Unnamed: 0,2014,2016
Estonia,16.1,15.0
Italy,7.1,
Luxembourg,21.5,19.8
Netherlands,11.7,12.2
Sweden,5.8,5.6


# Querying a DataFrame

In [23]:
df.loc['Italy']

2014    7.1
2016    NaN
Name: Italy, dtype: float64

In [24]:
df.iloc[1]

2014    7.1
2016    NaN
Name: Italy, dtype: float64

In [25]:
df[2016]

Estonia        15.0
Italy           NaN
Luxembourg     19.8
Netherlands    12.2
Sweden          5.6
Name: 2016, dtype: float64

In [26]:
df.loc[:,2016]

Estonia        15.0
Italy           NaN
Luxembourg     19.8
Netherlands    12.2
Sweden          5.6
Name: 2016, dtype: float64

In [27]:
df.loc['Estonia':'Luxembroug',2016]

Estonia       15.0
Italy          NaN
Luxembourg    19.8
Name: 2016, dtype: float64

In [28]:
df.iloc[:,0]

Estonia        16.1
Italy           7.1
Luxembourg     21.5
Netherlands    11.7
Sweden          5.8
Name: 2014, dtype: float64

In [29]:
df.iloc[0:3,1]

Estonia       15.0
Italy          NaN
Luxembourg    19.8
Name: 2016, dtype: float64

# Adding Rows and Columns

In [30]:
df.loc['Italy',2016] = 7.2

In [31]:
df

Unnamed: 0,2014,2016
Estonia,16.1,15.0
Italy,7.1,7.2
Luxembourg,21.5,19.8
Netherlands,11.7,12.2
Sweden,5.8,5.6


In [32]:
df.loc['Germany',:] = [11.4, 11.4]

In [33]:
df

Unnamed: 0,2014,2016
Estonia,16.1,15.0
Italy,7.1,7.2
Luxembourg,21.5,19.8
Netherlands,11.7,12.2
Sweden,5.8,5.6
Germany,11.4,11.4


In [34]:
df.loc[:,2006] = [13.8, 10.0, 29.7, 13.5, 7.6, 12.4]
df

Unnamed: 0,2014,2016,2006
Estonia,16.1,15.0,13.8
Italy,7.1,7.2,10.0
Luxembourg,21.5,19.8,29.7
Netherlands,11.7,12.2,13.5
Sweden,5.8,5.6,7.6
Germany,11.4,11.4,12.4


In [35]:
df.loc[:,2020] = None
df

Unnamed: 0,2014,2016,2006,2020
Estonia,16.1,15.0,13.8,
Italy,7.1,7.2,10.0,
Luxembourg,21.5,19.8,29.7,
Netherlands,11.7,12.2,13.5,
Sweden,5.8,5.6,7.6,
Germany,11.4,11.4,12.4,


# Deleting Rows and Columns

In [36]:
df.drop(index='Germany')

Unnamed: 0,2014,2016,2006,2020
Estonia,16.1,15.0,13.8,
Italy,7.1,7.2,10.0,
Luxembourg,21.5,19.8,29.7,
Netherlands,11.7,12.2,13.5,
Sweden,5.8,5.6,7.6,


In [37]:
df.drop(columns=[2014,2016])

Unnamed: 0,2006,2020
Estonia,13.8,
Italy,10.0,
Luxembourg,29.7,
Netherlands,13.5,
Sweden,7.6,
Germany,12.4,


In [38]:
df

Unnamed: 0,2014,2016,2006,2020
Estonia,16.1,15.0,13.8,
Italy,7.1,7.2,10.0,
Luxembourg,21.5,19.8,29.7,
Netherlands,11.7,12.2,13.5,
Sweden,5.8,5.6,7.6,
Germany,11.4,11.4,12.4,


In [39]:
df.drop(columns=[2014,2016],index='Germany',inplace=True)
df

Unnamed: 0,2006,2020
Estonia,13.8,
Italy,10.0,
Luxembourg,29.7,
Netherlands,13.5,
Sweden,7.6,
