# Ch08. OECD 국가의 GDP 데이터로 실무의 데이터 다루기

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


## pandas.filter

In [4]:
data = [
    [0,1,2,3],
    [4,5,6,7],
    [8,9,10,11]
]
df = pd.DataFrame(data=data, columns=['A', 'BC', 'AC', 'DA'])
df

Unnamed: 0,A,BC,AC,DA
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [6]:
df.filter(like='A')

Unnamed: 0,A,AC,DA
0,0,2,3
1,4,6,7
2,8,10,11


In [8]:
df.filter(regex=r'^A')

Unnamed: 0,A,AC
0,0,2
1,4,6
2,8,10


In [10]:
df.filter(regex=r'B|D')

Unnamed: 0,BC,DA
0,1,3
1,5,7
2,9,11


## OECD 데이터 다루기

In [13]:
url = 'https://raw.githubusercontent.com/panda-kim/csv_files/main/OECD_GDP.csv'

In [19]:
df_ex1 = pd.read_csv(url)
df_ex1

Unnamed: 0.1,Unnamed: 0,2019,2019.1,2019.2,2019.3,2019.4,2020,2020.1,2020.2,2020.3,2020.4
0,,1인당 GDP,GDP 성장률,GDP,수입,수출,1인당 GDP,GDP 성장률,GDP,수입,수출
1,국가,,,,,,,,,,
2,아시아,,,,,,,,,,
3,대한민국,31929,2.2,1651.0,503343,542233,31637,-0.9,1638.2,467633,512498
4,이스라엘,43589,3.5,394.7,75697,51899,43611,-2.4,402.0,69810,49372
5,일본,40113,0.3,5064.9,721078,705671,38113,-4.3,4845.2,-,-
6,튀르키예,9127,0.9,761.4,210261,181021,8538,1.8,720.1,219758,169835
7,북아메리카,,,,,,,,,,
8,캐나다,46327,1.9,1741.6,462392,450805,43242,-5.4,1643.4,419688,392129
9,멕시코,9946,-0.1,1268.9,-,459597,8347,-8.2,1076.2,-,416235


In [20]:
df_ex1 = pd.read_csv(url,header=[0,1])
df_ex1

Unnamed: 0_level_0,Unnamed: 0_level_0,2019,2019,2019,2019,2019,2020,2020,2020,2020,2020
Unnamed: 0_level_1,Unnamed: 0_level_1.1,1인당 GDP,GDP 성장률,GDP,수입,수출,1인당 GDP,GDP 성장률,GDP,수입,수출
0,국가,,,,,,,,,,
1,아시아,,,,,,,,,,
2,대한민국,31929.0,2.2,1651.0,503343,542233.0,31637.0,-0.9,1638.2,467633,512498
3,이스라엘,43589.0,3.5,394.7,75697,51899.0,43611.0,-2.4,402.0,69810,49372
4,일본,40113.0,0.3,5064.9,721078,705671.0,38113.0,-4.3,4845.2,-,-
5,튀르키예,9127.0,0.9,761.4,210261,181021.0,8538.0,1.8,720.1,219758,169835
6,북아메리카,,,,,,,,,,
7,캐나다,46327.0,1.9,1741.6,462392,450805.0,43242.0,-5.4,1643.4,419688,392129
8,멕시코,9946.0,-0.1,1268.9,-,459597.0,8347.0,-8.2,1076.2,-,416235
9,미국,65280.0,2.2,21433.2,2497500,1643100.0,63544.0,-3.5,20936.6,-,-


In [21]:
df_ex1 = pd.read_csv(url,header=[0,1], index_col=0)
df_ex1

Unnamed: 0_level_0,2019,2019,2019,2019,2019,2020,2020,2020,2020,2020
Unnamed: 0_level_1,1인당 GDP,GDP 성장률,GDP,수입,수출,1인당 GDP,GDP 성장률,GDP,수입,수출
국가,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
아시아,,,,,,,,,,
대한민국,31929.0,2.2,1651.0,503343,542233.0,31637.0,-0.9,1638.2,467633,512498
이스라엘,43589.0,3.5,394.7,75697,51899.0,43611.0,-2.4,402.0,69810,49372
일본,40113.0,0.3,5064.9,721078,705671.0,38113.0,-4.3,4845.2,-,-
튀르키예,9127.0,0.9,761.4,210261,181021.0,8538.0,1.8,720.1,219758,169835
북아메리카,,,,,,,,,,
캐나다,46327.0,1.9,1741.6,462392,450805.0,43242.0,-5.4,1643.4,419688,392129
멕시코,9946.0,-0.1,1268.9,-,459597.0,8347.0,-8.2,1076.2,-,416235
미국,65280.0,2.2,21433.2,2497500,1643100.0,63544.0,-3.5,20936.6,-,-
남아메리카,,,,,,,,,,


In [24]:
df_ex1.stack(0, dropna=False)

Unnamed: 0,국가,level_1,1인당 GDP,GDP,GDP 성장률
0,아시아,2019,,,
1,아시아,2020,,,
2,대한민국,2019,31929.0,1651.0,2.2
3,대한민국,2020,31637.0,1638.2,-0.9
4,이스라엘,2019,43589.0,394.7,3.5
...,...,...,...,...,...
81,오세아니아,2020,,,
82,오스트레일리아,2019,55057.0,1396.6,2.2
83,오스트레일리아,2020,51812.0,1330.9,-0.3
84,뉴질랜드,2019,41999.0,209.1,1.6


In [25]:
df_ex1.stack(0, dropna=False).filter(like='GDP')

Unnamed: 0_level_0,Unnamed: 1_level_0,1인당 GDP,GDP,GDP 성장률
국가,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
아시아,2019,,,
아시아,2020,,,
대한민국,2019,31929.0,1651.0,2.2
대한민국,2020,31637.0,1638.2,-0.9
이스라엘,2019,43589.0,394.7,3.5
...,...,...,...,...
오세아니아,2020,,,
오스트레일리아,2019,55057.0,1396.6,2.2
오스트레일리아,2020,51812.0,1330.9,-0.3
뉴질랜드,2019,41999.0,209.1,1.6


In [26]:
df_ex1.stack(0, dropna=False).filter(like='GDP').reset_index()

Unnamed: 0,국가,level_1,1인당 GDP,GDP,GDP 성장률
0,아시아,2019,,,
1,아시아,2020,,,
2,대한민국,2019,31929.0,1651.0,2.2
3,대한민국,2020,31637.0,1638.2,-0.9
4,이스라엘,2019,43589.0,394.7,3.5
...,...,...,...,...,...
81,오세아니아,2020,,,
82,오스트레일리아,2019,55057.0,1396.6,2.2
83,오스트레일리아,2020,51812.0,1330.9,-0.3
84,뉴질랜드,2019,41999.0,209.1,1.6


In [27]:
df_ex2 = (df_ex1
          .stack(0, dropna=False)
          .filter(like='GDP')
          .reset_index()
          .rename(columns={'level_1':'연도'})
          )
df_ex2

Unnamed: 0,국가,연도,1인당 GDP,GDP,GDP 성장률
0,아시아,2019,,,
1,아시아,2020,,,
2,대한민국,2019,31929.0,1651.0,2.2
3,대한민국,2020,31637.0,1638.2,-0.9
4,이스라엘,2019,43589.0,394.7,3.5
...,...,...,...,...,...
81,오세아니아,2020,,,
82,오스트레일리아,2019,55057.0,1396.6,2.2
83,오스트레일리아,2020,51812.0,1330.9,-0.3
84,뉴질랜드,2019,41999.0,209.1,1.6


In [28]:
cond1 = df_ex2['GDP'].isnull()
df_ex2[cond1]

Unnamed: 0,국가,연도,1인당 GDP,GDP,GDP 성장률
0,아시아,2019,,,
1,아시아,2020,,,
10,북아메리카,2019,,,
11,북아메리카,2020,,,
18,남아메리카,2019,,,
19,남아메리카,2020,,,
26,유럽,2019,,,
27,유럽,2020,,,
80,오세아니아,2019,,,
81,오세아니아,2020,,,


In [32]:
df_ex2['구분'] = df_ex2['국가'].mask(~cond1).fillna(method='ffill')
df_ex2

Unnamed: 0,국가,연도,1인당 GDP,GDP,GDP 성장률,구분
0,아시아,2019,,,,아시아
1,아시아,2020,,,,아시아
2,대한민국,2019,31929.0,1651.0,2.2,아시아
3,대한민국,2020,31637.0,1638.2,-0.9,아시아
4,이스라엘,2019,43589.0,394.7,3.5,아시아
...,...,...,...,...,...,...
81,오세아니아,2020,,,,오세아니아
82,오스트레일리아,2019,55057.0,1396.6,2.2,오세아니아
83,오스트레일리아,2020,51812.0,1330.9,-0.3,오세아니아
84,뉴질랜드,2019,41999.0,209.1,1.6,오세아니아


In [34]:
df_ex2[~cond1]

Unnamed: 0,국가,연도,1인당 GDP,GDP,GDP 성장률,구분
2,대한민국,2019,31929.0,1651.0,2.2,아시아
3,대한민국,2020,31637.0,1638.2,-0.9,아시아
4,이스라엘,2019,43589.0,394.7,3.5,아시아
5,이스라엘,2020,43611.0,402.0,-2.4,아시아
6,일본,2019,40113.0,5064.9,0.3,아시아
...,...,...,...,...,...,...
79,영국,2020,40285.0,2707.7,-9.8,유럽
82,오스트레일리아,2019,55057.0,1396.6,2.2,오세아니아
83,오스트레일리아,2020,51812.0,1330.9,-0.3,오세아니아
84,뉴질랜드,2019,41999.0,209.1,1.6,오세아니아


In [36]:
df_ex3 = df_ex2[~cond1].reset_index(drop=True)
df_ex3

Unnamed: 0,국가,연도,1인당 GDP,GDP,GDP 성장률,구분
0,대한민국,2019,31929.0,1651.0,2.2,아시아
1,대한민국,2020,31637.0,1638.2,-0.9,아시아
2,이스라엘,2019,43589.0,394.7,3.5,아시아
3,이스라엘,2020,43611.0,402.0,-2.4,아시아
4,일본,2019,40113.0,5064.9,0.3,아시아
...,...,...,...,...,...,...
71,영국,2020,40285.0,2707.7,-9.8,유럽
72,오스트레일리아,2019,55057.0,1396.6,2.2,오세아니아
73,오스트레일리아,2020,51812.0,1330.9,-0.3,오세아니아
74,뉴질랜드,2019,41999.0,209.1,1.6,오세아니아


In [37]:
df_ex3.pivot_table('GDP 성장률', index='구분', columns='연도', aggfunc='max')

연도,2019,2020
구분,Unnamed: 1_level_1,Unnamed: 2_level_1
남아메리카,3.3,-4.5
북아메리카,2.2,-3.5
아시아,3.5,1.8
오세아니아,2.2,1.0
유럽,5.6,3.4


In [38]:
df_ex3.pivot_table('GDP 성장률', index='구분', columns='연도',
                   aggfunc=lambda x: df_ex3.loc[x.idxmax(), '국가'])

연도,2019,2020
구분,Unnamed: 1_level_1,Unnamed: 2_level_1
남아메리카,콜롬비아,코스타리카
북아메리카,미국,미국
아시아,이스라엘,튀르키예
오세아니아,오스트레일리아,뉴질랜드
유럽,아일랜드,아일랜드
