# PANDAS LIBRARY

In [1]:
import sys
sys.path.append('/home/oscar/py_envs/lib/python3.12/site-packages')

In [2]:
import pandas as pd

In [3]:
import numpy as np

## Series

In [5]:
# In millions

g7_pop=pd.Series([35.467,69.951,80.940,60.665,127.061,64.511,318.523])

In [6]:
g7_pop

0     35.467
1     69.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64

In [8]:
g7_pop.name="Population of G7 Countries"

In [9]:
g7_pop

0     35.467
1     69.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: Population of G7 Countries, dtype: float64

NOTE: Series are pretty similar to NumPy arrays

The underline object in which pandas stores the type of structure (Series) is a NumPy array, data is backed in NumPy array

In [10]:
g7_pop.values

array([ 35.467,  69.951,  80.94 ,  60.665, 127.061,  64.511, 318.523])

### How to ask the type of the data structure (different to .dtype -data type itself-)

In [12]:
type(g7_pop.values)

numpy.ndarray

## Series Indexing

In [13]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

In [15]:
g7_pop.index=[
    "Canada",
    "France",
    "Germany",
    "Italy",
    "Japan",
    "United Kingdom",
    "United States"    
]

In [16]:
g7_pop

Canada             35.467
France             69.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population of G7 Countries, dtype: float64

In [17]:
pd.Series([35.467,69.951,80.940,60.665,127.061,64.511,318.523],
    index=["Canada",
    "France",
    "Germany",
    "Italy",
    "Japan",
    "United Kingdom",
    "United States"], name="G7 Population in millions"    
)

Canada             35.467
France             69.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [18]:
pd.Series(
    {"Canada":35.467,
    "France":69.951,
    "Germany":80.940,
    "Italy":60.665,
    "Japan":127.061,
    "United Kingdom":64.511,
    "United States":318.523     
    }, name="population of G7 in millions"
    
)

Canada             35.467
France             69.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: population of G7 in millions, dtype: float64

In [20]:
pd.Series(g7_pop, index=["Germany","Italy","Japan","United States"])

Germany           80.940
Italy             60.665
Japan            127.061
United States    318.523
Name: Population of G7 Countries, dtype: float64

In [23]:
g7_pop.iloc[0] 

np.float64(35.467)

In [25]:
g7_pop.iloc[6] 

np.float64(318.523)

In [26]:
g7_pop["Canada"]

np.float64(35.467)

In [27]:
g7_pop["United States"]

np.float64(318.523)

### Multi-indexing like NumPy

In [28]:
g7_pop[["Canada","United States"]]

Canada            35.467
United States    318.523
Name: Population of G7 Countries, dtype: float64

In [29]:
g7_pop.iloc[[0,6]] 

Canada            35.467
United States    318.523
Name: Population of G7 Countries, dtype: float64

### Slicing

In [36]:
# Different to Python because include the upper limit in this case this serie (backed in a NumPy array)
g7_pop["Canada":"Germany"] 

Canada     35.467
France     69.951
Germany    80.940
Name: Population of G7 Countries, dtype: float64

In [34]:
# Note: Doesn't include the upper limit index (3 in this particular case)
g7_pop.iloc[0:3] 

Canada     35.467
France     69.951
Germany    80.940
Name: Population of G7 Countries, dtype: float64

In [35]:
#even more efficient if we want to include the first element (index 0) we discard the bottom limit in the ask
g7_pop.iloc[:3] 

Canada     35.467
France     69.951
Germany    80.940
Name: Population of G7 Countries, dtype: float64

## Boolean filtering in Series

In [37]:
g7_pop.iloc[0:3] 

Canada             35.467
France             69.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population of G7 Countries, dtype: float64

In [40]:
#Boolean choice
g7_pop <70

Canada             True
France             True
Germany           False
Italy              True
Japan             False
United Kingdom     True
United States     False
Name: Population of G7 Countries, dtype: bool

In [42]:
#Boolean filtering or slicing
g7_pop[g7_pop<70] 

Canada            35.467
France            69.951
Italy             60.665
United Kingdom    64.511
Name: Population of G7 Countries, dtype: float64

In [45]:
#Vectorized or Broadcasting Operations
g7_pop*1_000_000

Canada             35467000.0
France             69951000.0
Germany            80940000.0
Italy              60665000.0
Japan             127061000.0
United Kingdom     64511000.0
United States     318523000.0
Name: Population of G7 Countries, dtype: float64

### Statistical operations and filtering with them

In [48]:
#Artithmetic mean (not geometric nor armonic mean)
g7_pop.mean()

np.float64(108.15971428571429)

In [49]:
g7_pop.std()

np.float64(96.8297286696503)

In [54]:
g7_pop[g7_pop != g7_pop.mean()]

Canada             35.467
France             69.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population of G7 Countries, dtype: float64

In [56]:
# std():standard deviation
# mean():arithmetic mean
# != : not
# & : and
# | . or

g7_pop[ (g7_pop > g7_pop.mean()-g7_pop.std()/2) | (g7_pop > g7_pop.mean()+g7_pop.std()/2) ]

France             69.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population of G7 Countries, dtype: float64

### Modifying series

In [57]:
g7_pop["Canada"]=40

In [58]:
g7_pop

Canada             40.000
France             69.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population of G7 Countries, dtype: float64

In [59]:
g7_pop.iloc[0]=36

In [60]:
g7_pop

Canada             36.000
France             69.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population of G7 Countries, dtype: float64

In [63]:
# Modifying Series with boolean operations 

g7_pop[g7_pop>70]=99.99

In [64]:
g7_pop

Canada            36.000
France            69.951
Germany           99.990
Italy             60.665
Japan             99.990
United Kingdom    64.511
United States     99.990
Name: Population of G7 Countries, dtype: float64