# Panda - Series

## Hands on!

In [1]:
import pandas as pd
import numpy as np

## Panda Series

### We'll start analizing "The Group of Seven" [https://www.nationmaster.com/country-info/groups/Group-of-7-countries-(G7)]

In [2]:
# Population in millions
g7_pop = pd.Series([63.18, 316.67, 127.25, 81.15, 65.95, 61.48, 34.57])

In [3]:
g7_pop

0     63.18
1    316.67
2    127.25
3     81.15
4     65.95
5     61.48
6     34.57
dtype: float64

### Series can have a **name**:

In [4]:
g7_pop.name = 'G7 Population in millions'

In [5]:
g7_pop

0     63.18
1    316.67
2    127.25
3     81.15
4     65.95
5     61.48
6     34.57
Name: G7 Population in millions, dtype: float64

### Series are pretty similoar to numpy arrays:

In [6]:
g7_pop.dtype

dtype('float64')

In [7]:
g7_pop.values

array([ 63.18, 316.67, 127.25,  81.15,  65.95,  61.48,  34.57])

### They are actually backed by numpy arrays:

In [8]:
type(g7_pop.values)

numpy.ndarray

### A Series has an **index**:

In [9]:
g7_pop

0     63.18
1    316.67
2    127.25
3     81.15
4     65.95
5     61.48
6     34.57
Name: G7 Population in millions, dtype: float64

In [10]:
g7_pop[0]

63.18

In [11]:
g7_pop[1]

316.67

In [12]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

### But, in contrast to lists, we can explicitly define the index:

In [13]:
g7_pop.index = [
    'UK',
    'US',
    'JP',
    'DE',
    'FR',
    'IT',
    'CA',
]

In [14]:
g7_pop

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
CA     34.57
Name: G7 Population in millions, dtype: float64

### We can create Series out of dictionaries:

In [15]:
pd.Series({
    'UK': 63.18,
    'US': 316.67,
    'JP': 127.25,
    'DE': 81.15,
    'FR': 65.95,
    'IT': 61.48,
    'CA': 34.57
}, name='G7 Population in millions')

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
CA     34.57
Name: G7 Population in millions, dtype: float64

In [16]:
pd.Series(
    [63.18, 316.67, 127.25, 81.15, 65.95, 61.48, 34.57],
    index=['UK','US','JP','DE','FR','IT','CA'],
    name='G7 Population in millions'
)

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
CA     34.57
Name: G7 Population in millions, dtype: float64

### Series out of other series, specifying indexes:

In [17]:
pd.Series(g7_pop, index=['UK', 'US', 'JP', 'DE'])

UK     63.18
US    316.67
JP    127.25
DE     81.15
Name: G7 Population in millions, dtype: float64

### Indexing

In [28]:
g7_pop

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
CA     34.57
Name: G7 Population in millions, dtype: float64

In [18]:
g7_pop['CA']

34.57

In [19]:
g7_pop['JP']

127.25

In [20]:
g7_pop[0]

63.18

In [21]:
g7_pop.iloc[0]

63.18

In [22]:
g7_pop.iloc[-1]

34.57

In [33]:
# The result is another Series
g7_pop[['IT','FR']]

IT    61.48
FR    65.95
Name: G7 Population in millions, dtype: float64

In [26]:
# In Pandas, the upper limit is also included:
g7_pop[[0, 1]]

UK     63.18
US    316.67
Name: G7 Population in millions, dtype: float64

In [27]:
g7_pop.iloc[[0, 1]]

UK     63.18
US    316.67
Name: G7 Population in millions, dtype: float64

In [30]:
g7_pop['UK': 'IT']

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
Name: G7 Population in millions, dtype: float64

In [34]:
g7_pop[:]

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
CA     34.57
Name: G7 Population in millions, dtype: float64

### Conditional selection (Boolean Arrays)

In [None]:
# ~ not
# | or
# & and

In [35]:
g7_pop

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
CA     34.57
Name: G7 Population in millions, dtype: float64

In [36]:
g7_pop > 70

UK    False
US     True
JP     True
DE     True
FR    False
IT    False
CA    False
Name: G7 Population in millions, dtype: bool

In [37]:
g7_pop[g7_pop > 70]

US    316.67
JP    127.25
DE     81.15
Name: G7 Population in millions, dtype: float64

In [38]:
g7_pop.mean()

107.17857142857144

In [39]:
g7_pop[g7_pop > g7_pop.mean()]

US    316.67
JP    127.25
Name: G7 Population in millions, dtype: float64

In [40]:
g7_pop.std()

96.57729278123499

In [41]:
g7_pop[(g7_pop > g7_pop.mean() - g7_pop.std() / 2) | (g7_pop > g7_pop.mean() + g7_pop.std() / 2)]

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
Name: G7 Population in millions, dtype: float64

In [42]:
g7_pop[(g7_pop > g7_pop.mean() - g7_pop.std() / 2) & (g7_pop > g7_pop.mean() + g7_pop.std() / 2)]

US    316.67
Name: G7 Population in millions, dtype: float64

### Operations and methods

In [43]:
g7_pop * 1_000_000

UK     63180000.0
US    316670000.0
JP    127250000.0
DE     81150000.0
FR     65950000.0
IT     61480000.0
CA     34570000.0
Name: G7 Population in millions, dtype: float64

In [44]:
g7_pop * 1000000

UK     63180000.0
US    316670000.0
JP    127250000.0
DE     81150000.0
FR     65950000.0
IT     61480000.0
CA     34570000.0
Name: G7 Population in millions, dtype: float64

In [45]:
g7_pop.mean()

107.17857142857144

In [46]:
np.log(g7_pop)

UK    4.145988
US    5.757860
JP    4.846154
DE    4.396299
FR    4.188897
IT    4.118712
CA    3.542986
Name: G7 Population in millions, dtype: float64

In [47]:
g7_pop['FR':'IT'].mean()

63.715

In [48]:
g7_pop['UK':'US'].mean()

189.925

### Boolean arrays

In [49]:
g7_pop

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
CA     34.57
Name: G7 Population in millions, dtype: float64

In [50]:
g7_pop > 80

UK    False
US     True
JP     True
DE     True
FR    False
IT    False
CA    False
Name: G7 Population in millions, dtype: bool

In [51]:
g7_pop[g7_pop > 80]

US    316.67
JP    127.25
DE     81.15
Name: G7 Population in millions, dtype: float64

In [52]:
g7_pop[(g7_pop > 80) | (g7_pop < 40)]

US    316.67
JP    127.25
DE     81.15
CA     34.57
Name: G7 Population in millions, dtype: float64

In [53]:
g7_pop[(g7_pop > 80) | (g7_pop < 200)]

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
CA     34.57
Name: G7 Population in millions, dtype: float64

In [54]:
g7_pop[(g7_pop > 80) & (g7_pop < 200)]

JP    127.25
DE     81.15
Name: G7 Population in millions, dtype: float64

### Modifying series

In [56]:
g7_pop['CA'] = 40.5

In [57]:
g7_pop

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
CA     40.50
Name: G7 Population in millions, dtype: float64

In [58]:
g7_pop[-1] = 500

In [59]:
g7_pop

UK     63.18
US    316.67
JP    127.25
DE     81.15
FR     65.95
IT     61.48
CA    500.00
Name: G7 Population in millions, dtype: float64

In [60]:
g7_pop[g7_pop < 70] = 99.99

In [61]:
g7_pop

UK     99.99
US    316.67
JP    127.25
DE     81.15
FR     99.99
IT     99.99
CA    500.00
Name: G7 Population in millions, dtype: float64