# Data Analysis with Python - freeCodeCamp Course

## Introduction to Pandas (Series)

In [1]:
import numpy as np
import pandas as pd

### Pandas Series

In [3]:
# Population of G7 countries (Canada, France, Germany, Italy, Japan, the United Kingdom and the United States) in millions
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64

In [4]:
g7_pop.name = 'G7 Population (in millions)'
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population (in millions), dtype: float64

In [5]:
g7_pop.values

array([ 35.467,  63.951,  80.94 ,  60.665, 127.061,  64.511, 318.523])

In [10]:
g7_pop[0]

35.467

In [11]:
g7_pop[[2,5]]

2    80.940
5    64.511
Name: G7 Population (in millions), dtype: float64

In [13]:
g7_pop.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States',
]

g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population (in millions), dtype: float64

In [15]:
# Alternitely...
pd.Series({
    'Canada': 35.467,
    'France': 63.951,
    'Germany': 80.94,
    'Italy': 60.665,
    'Japan': 127.061,
    'United Kingdom': 64.511,
    'United States': 318.523
}, name='G7 Population in millions')

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [16]:
# Or...
pd.Series(
    [35.467, 63.951, 80.94, 60.665, 127.061, 64.511, 318.523],
    index=['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom',
       'United States'],
    name='G7 Population in millions')

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

### Indexing

In [17]:
g7_pop['Canada']

35.467

In [18]:
g7_pop[['Canada','Japan']]

Canada     35.467
Japan     127.061
Name: G7 Population (in millions), dtype: float64

In [19]:
# Numeric positions can also be used, with the iloc attribute:
g7_pop.iloc[0]

35.467

In [20]:
g7_pop.iloc[[0,4]]

Canada     35.467
Japan     127.061
Name: G7 Population (in millions), dtype: float64

In [22]:
# Slicing also works, BUT in Pandas, the upper limit is also included:
g7_pop['Canada':'Japan']

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
Name: G7 Population (in millions), dtype: float64

### Conditional Selection

In [23]:
g7_pop > 70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: G7 Population (in millions), dtype: bool

In [24]:
g7_pop[g7_pop > 70]

Germany           80.940
Japan            127.061
United States    318.523
Name: G7 Population (in millions), dtype: float64

In [25]:
g7_pop.mean()

107.30257142857144

In [27]:
g7_pop > g7_pop.mean()

Canada            False
France            False
Germany           False
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: G7 Population (in millions), dtype: bool

In [28]:
g7_pop[g7_pop > g7_pop.mean()]

Japan            127.061
United States    318.523
Name: G7 Population (in millions), dtype: float64

In [29]:
g7_pop.std()

97.24996987121581

In [30]:
g7_pop[(g7_pop > g7_pop.mean() - g7_pop.std() / 2) | (g7_pop > g7_pop.mean() + g7_pop.std() / 2)]

France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population (in millions), dtype: float64

### Operations and methods

In [31]:
g7_pop * 1000000

Canada             35467000.0
France             63951000.0
Germany            80940000.0
Italy              60665000.0
Japan             127061000.0
United Kingdom     64511000.0
United States     318523000.0
Name: G7 Population (in millions), dtype: float64

In [32]:
np.log(g7_pop)

Canada            3.568603
France            4.158117
Germany           4.393708
Italy             4.105367
Japan             4.844667
United Kingdom    4.166836
United States     5.763695
Name: G7 Population (in millions), dtype: float64

In [36]:
g7_pop['France': 'Italy'].mean()

68.51866666666666

In [35]:
g7_pop['France': 'Italy'].std()

10.881938721263472

### Modifying series

In [37]:
g7_pop['Canada'] = 40.5
g7_pop

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population (in millions), dtype: float64

In [38]:
g7_pop.iloc[-1] = 495
g7_pop

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     495.000
Name: G7 Population (in millions), dtype: float64

In [39]:
g7_pop[g7_pop < 80] = 75
g7_pop

Canada             75.000
France             75.000
Germany            80.940
Italy              75.000
Japan             127.061
United Kingdom     75.000
United States     495.000
Name: G7 Population (in millions), dtype: float64