In [2]:
import pandas as pd
import numpy as np

In [3]:
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.551, 318.523])
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.551
6    318.523
dtype: float64

In [5]:
g7_pop.name = 'G7 Population in millions'
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.551
6    318.523
Name: G7 Population in millions, dtype: float64

In [6]:
g7_pop.dtype

dtype('float64')

In [7]:
g7_pop.values

array([ 35.467,  63.951,  80.94 ,  60.665, 127.061,  64.551, 318.523])

In [8]:
type(g7_pop.values)

numpy.ndarray

In [9]:
g7_pop[0]

35.467

In [10]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

In [11]:
g7_pop.index = [
    'Canada',
    'France',
    'Germany',
    'Italy', 
    'Japan',
    'UK',
    'US'
]
g7_pop

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.551
US         318.523
Name: G7 Population in millions, dtype: float64

In [13]:
pd.Series({
    'Canada': 35.467,
    'France': 63.951,
    'Germany': 80.940,
    'Italy': 60.665,
    'Japan': 127.061,
    'UK': 64.511,
    'US': 318.523
})

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
US         318.523
dtype: float64

In [15]:
g7_pop['Canada']

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.551
US         318.523
Name: G7 Population in millions, dtype: float64

In [16]:
g7_pop.iloc[0]

35.467

In [17]:
g7_pop.iloc[-1]

318.523

In [18]:
g7_pop[['Italy', 'France']]

Italy     60.665
France    63.951
Name: G7 Population in millions, dtype: float64

In [19]:
g7_pop.iloc[[0, 1]]

Canada    35.467
France    63.951
Name: G7 Population in millions, dtype: float64

In [20]:
g7_pop['Canada': 'Italy']
# in python, upper limit is not returned
# in pandas, upper limit is returned

Canada     35.467
France     63.951
Germany    80.940
Italy      60.665
Name: G7 Population in millions, dtype: float64

In [21]:
g7_pop > 70

Canada     False
France     False
Germany     True
Italy      False
Japan       True
UK         False
US          True
Name: G7 Population in millions, dtype: bool

In [22]:
g7_pop[g7_pop > 70]

Germany     80.940
Japan      127.061
US         318.523
Name: G7 Population in millions, dtype: float64

In [23]:
g7_pop.mean()

107.30828571428572

In [24]:
g7_pop * 1_000_000

Canada      35467000.0
France      63951000.0
Germany     80940000.0
Italy       60665000.0
Japan      127061000.0
UK          64551000.0
US         318523000.0
Name: G7 Population in millions, dtype: float64

In [28]:
np.log(g7_pop)

Canada     3.568603
France     4.158117
Germany    4.393708
Italy      4.105367
Japan      4.844667
UK         4.167456
US         5.763695
Name: G7 Population in millions, dtype: float64

In [30]:
g7_pop[1] = 99
g7_pop

  g7_pop[1] = 99


Canada      35.467
France      99.000
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.551
US         318.523
Name: G7 Population in millions, dtype: float64

In [31]:
g7_pop = pd.Series({
    'Canada': 35.467,
    'France': 63.951,
    'Germany': 80.940,
    'Italy': 60.665,
    'Japan': 127.061,
    'UK': 64.511,
    'US': 318.523
})

In [32]:
df = pd.DataFrame({
    'Population': [35.467, 63.951, 80.94 , 60.665, 127.061, 64.511, 318.523],
    'GDP': [
        1785387,
        2833687,
        3874437,
        2167744,
        4602367,
        2950039,
        17348075
    ],
    'Surface Area': [
        9984670,
        640679,
        357114,
        301336,
        377930,
        242495,
        9525067
    ],
    'HDI': [
        0.913,
        0.888,
        0.916,
        0.873,
        0.891,
        0.907,
        0.915
    ],
    'Continent': [
        'America',
        'Europe',
        'Europe',
        'Europe',
        'Asia',
        'Europe',
        'America'
    ]
}, columns=['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'])

In [33]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
0,35.467,1785387,9984670,0.913,America
1,63.951,2833687,640679,0.888,Europe
2,80.94,3874437,357114,0.916,Europe
3,60.665,2167744,301336,0.873,Europe
4,127.061,4602367,377930,0.891,Asia
5,64.511,2950039,242495,0.907,Europe
6,318.523,17348075,9525067,0.915,America


In [34]:
df.index = [
    'Canada',
    'France',
    'Germany',
    'Italy', 
    'Japan',
    'UK',
    'US'
]
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
UK,64.511,2950039,242495,0.907,Europe
US,318.523,17348075,9525067,0.915,America


In [35]:
df.columns

Index(['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'], dtype='object')

In [36]:
df.index

Index(['Canada', 'France', 'Germany', 'Italy', 'Japan', 'UK', 'US'], dtype='object')

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to US
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    7 non-null      float64
 1   GDP           7 non-null      int64  
 2   Surface Area  7 non-null      int64  
 3   HDI           7 non-null      float64
 4   Continent     7 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 336.0+ bytes


In [39]:
df.size

35

In [40]:
df.shape

(7, 5)

In [41]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429
std,97.24997,5494020.0,4576187.0,0.016592
min,35.467,1785387.0,242495.0,0.873
25%,62.308,2500716.0,329225.0,0.8895
50%,64.511,2950039.0,377930.0,0.907
75%,104.0005,4238402.0,5082873.0,0.914
max,318.523,17348080.0,9984670.0,0.916


In [43]:
df.dtypes

Population      float64
GDP               int64
Surface Area      int64
HDI             float64
Continent        object
dtype: object

In [44]:
df.dtypes.value_counts()

float64    2
int64      2
object     1
Name: count, dtype: int64

In [45]:
df.loc['Canada']

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Continent       America
Name: Canada, dtype: object

In [46]:
df.iloc[-1]

Population       318.523
GDP             17348075
Surface Area     9525067
HDI                0.915
Continent        America
Name: US, dtype: object

In [47]:
df['Population']

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
US         318.523
Name: Population, dtype: float64

In [48]:
df.loc['France': 'Italy']

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe


In [49]:
df.loc['France': 'Italy', 'Population']

France     63.951
Germany    80.940
Italy      60.665
Name: Population, dtype: float64

In [51]:
df.iloc[[0, 1, -1]]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
US,318.523,17348075,9525067,0.915,America


In [53]:
df.iloc[1:3]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe


In [54]:
df.iloc[1:3, 3]

France     0.888
Germany    0.916
Name: HDI, dtype: float64

In [55]:
df.iloc[1:3, [0,3]]

Unnamed: 0,Population,HDI
France,63.951,0.888
Germany,80.94,0.916


In [56]:
df.iloc[1:3, 1:3]

Unnamed: 0,GDP,Surface Area
France,2833687,640679
Germany,3874437,357114


In [57]:
df['Population'] > 70

Canada     False
France     False
Germany     True
Italy      False
Japan       True
UK         False
US          True
Name: Population, dtype: bool

In [59]:
df[df['Population'] > 70]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602367,377930,0.891,Asia
US,318.523,17348075,9525067,0.915,America


In [61]:
df.loc[df['Population'] > 70, 'Population']

Germany     80.940
Japan      127.061
US         318.523
Name: Population, dtype: float64

In [63]:
df.drop('Canada')

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
UK,64.511,2950039,242495,0.907,Europe
US,318.523,17348075,9525067,0.915,America


In [64]:
df.drop(columns=['Population', 'HDI'])

Unnamed: 0,GDP,Surface Area,Continent
Canada,1785387,9984670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602367,377930,Asia
UK,2950039,242495,Europe
US,17348075,9525067,America


In [65]:
df[['Population', 'GDP']] / 100

Unnamed: 0,Population,GDP
Canada,0.35467,17853.87
France,0.63951,28336.87
Germany,0.8094,38744.37
Italy,0.60665,21677.44
Japan,1.27061,46023.67
UK,0.64511,29500.39
US,3.18523,173480.75


In [67]:
crisis = pd.Series([-1_000_000, -0.3], index=['GDP', 'HDI'])
crisis

GDP   -1000000.0
HDI         -0.3
dtype: float64

In [69]:
df[['GDP', 'HDI']] + crisis
# minus 1 million from all the values

Unnamed: 0,GDP,HDI
Canada,785387.0,0.613
France,1833687.0,0.588
Germany,2874437.0,0.616
Italy,1167744.0,0.573
Japan,3602367.0,0.591
UK,1950039.0,0.607
US,16348075.0,0.615


In [70]:
langs = pd.Series(
    ['French', 'German', 'Italian'],
    index=['France', 'Germany', 'Italy'],
    name='Language'
)
langs

France      French
Germany     German
Italy      Italian
Name: Language, dtype: object

In [72]:
df['Language'] = langs
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,French
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.665,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,
UK,64.511,2950039,242495,0.907,Europe,
US,318.523,17348075,9525067,0.915,America,


In [74]:
df['Langauge'] = 'English'
# if there is an equal symbol, it is modifying the underlying dataframe
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language,Langauge
Canada,35.467,1785387,9984670,0.913,America,,English
France,63.951,2833687,640679,0.888,Europe,French,English
Germany,80.94,3874437,357114,0.916,Europe,German,English
Italy,60.665,2167744,301336,0.873,Europe,Italian,English
Japan,127.061,4602367,377930,0.891,Asia,,English
UK,64.511,2950039,242495,0.907,Europe,,English
US,318.523,17348075,9525067,0.915,America,,English


In [77]:
df.rename(
    columns={
        'HDI': 'Human Development Index',
        'Anual Popcorn Consumption': 'APC'
        # APC does not exist, and will not cause any problems
    }, index={
        'US': 'USA',
        'Argentina': 'AR'
    })

Unnamed: 0,Population,GDP,Surface Area,Human Development Index,Continent,Language,Langauge
Canada,35.467,1785387,9984670,0.913,America,,English
France,63.951,2833687,640679,0.888,Europe,French,English
Germany,80.94,3874437,357114,0.916,Europe,German,English
Italy,60.665,2167744,301336,0.873,Europe,Italian,English
Japan,127.061,4602367,377930,0.891,Asia,,English
UK,64.511,2950039,242495,0.907,Europe,,English
USA,318.523,17348075,9525067,0.915,America,,English


In [78]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language,Langauge
Canada,35.467,1785387,9984670,0.913,America,,English
France,63.951,2833687,640679,0.888,Europe,French,English
Germany,80.94,3874437,357114,0.916,Europe,German,English
Italy,60.665,2167744,301336,0.873,Europe,Italian,English
Japan,127.061,4602367,377930,0.891,Asia,,English
UK,64.511,2950039,242495,0.907,Europe,,English
US,318.523,17348075,9525067,0.915,America,,English


In [79]:
df[['Population' , 'GDP']]

Unnamed: 0,Population,GDP
Canada,35.467,1785387
France,63.951,2833687
Germany,80.94,3874437
Italy,60.665,2167744
Japan,127.061,4602367
UK,64.511,2950039
US,318.523,17348075


In [80]:
df['GDP per Capita'] = df['GDP'] / df['Population']
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language,Langauge,GDP per Capita
Canada,35.467,1785387,9984670,0.913,America,,English,50339.385908
France,63.951,2833687,640679,0.888,Europe,French,English,44310.284437
Germany,80.94,3874437,357114,0.916,Europe,German,English,47868.013343
Italy,60.665,2167744,301336,0.873,Europe,Italian,English,35733.025633
Japan,127.061,4602367,377930,0.891,Asia,,English,36221.712406
UK,64.511,2950039,242495,0.907,Europe,,English,45729.239975
US,318.523,17348075,9525067,0.915,America,,English,54464.12033


In [81]:
df.head()

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language,Langauge,GDP per Capita
Canada,35.467,1785387,9984670,0.913,America,,English,50339.385908
France,63.951,2833687,640679,0.888,Europe,French,English,44310.284437
Germany,80.94,3874437,357114,0.916,Europe,German,English,47868.013343
Italy,60.665,2167744,301336,0.873,Europe,Italian,English,35733.025633
Japan,127.061,4602367,377930,0.891,Asia,,English,36221.712406


In [82]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI,GDP per Capita
count,7.0,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429,44952.254576
std,97.24997,5494020.0,4576187.0,0.016592,6954.983875
min,35.467,1785387.0,242495.0,0.873,35733.025633
25%,62.308,2500716.0,329225.0,0.8895,40265.998421
50%,64.511,2950039.0,377930.0,0.907,45729.239975
75%,104.0005,4238402.0,5082873.0,0.914,49103.699626
max,318.523,17348080.0,9984670.0,0.916,54464.12033


In [83]:
population = df['Population']

In [84]:
population.min()

35.467

In [85]:
population.quantile(.25)

62.308

In [86]:
population.quantile([.2, .4, .6, .8, 1])

0.2     61.3222
0.4     64.1750
0.6     74.3684
0.8    117.8368
1.0    318.5230
Name: Population, dtype: float64

In [87]:
population.describe()

count      7.000000
mean     107.302571
std       97.249970
min       35.467000
25%       62.308000
50%       64.511000
75%      104.000500
max      318.523000
Name: Population, dtype: float64

In [90]:

df.loc['China'] = pd.Series({'Population': 1_400_000_000, 'Continent': 'Asia'})
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language,Langauge,GDP per Capita
Canada,35.467,1785387.0,9984670.0,0.913,America,,English,50339.385908
France,63.951,2833687.0,640679.0,0.888,Europe,French,English,44310.284437
Germany,80.94,3874437.0,357114.0,0.916,Europe,German,English,47868.013343
Italy,60.665,2167744.0,301336.0,0.873,Europe,Italian,English,35733.025633
Japan,127.061,4602367.0,377930.0,0.891,Asia,,English,36221.712406
UK,64.511,2950039.0,242495.0,0.907,Europe,,English,45729.239975
US,318.523,17348075.0,9525067.0,0.915,America,,English,54464.12033
China,1400000000.0,,,,Asia,,,
