In [236]:
import requests
import pandas as pd
import numpy as np
import json

In [237]:
def world_bank_api(target_country, indicat):  
    api_params = {
        'date' : '1960:2021',
        'format' : 'json',
        'frequency' : 'Y',
        'per_page': 20000
    }  
    resp = requests.get(f'http://api.worldbank.org/v2/country/{target_country}/indicator/{indicat}', params=api_params)
    return json.loads(resp.text)

In [238]:
indicator = [
    'NY.GDP.MKTP.CD',       # GDP
    'SP.POP.TOTL',          # 인구
    'EN.ATM.PM25.MC.M3',    # 초미세먼지
    'EN.ATM.CO2E.PC'        # 이산화탄소 배출
    ]
country = 'all'

In [239]:
world_gdp = world_bank_api(country, indicator[0])
world_pop = world_bank_api(country, indicator[1])
world_pm25 = world_bank_api(country, indicator[2])
world_co2e = world_bank_api(country, indicator[3])

In [240]:
df_gdp = pd.json_normalize(world_gdp[1])
df_pop = pd.json_normalize(world_pop[1])
df_pm25 = pd.json_normalize(world_pm25[1])
df_co2e = pd.json_normalize(world_co2e[1])

In [241]:
import pickle

In [242]:
df_gdp['date'] = pd.to_numeric(df_gdp['date'])
df_pop['date'] = pd.to_numeric(df_pop['date'])
df_pm25['date'] = pd.to_numeric(df_pm25['date'])
df_co2e['date'] = pd.to_numeric(df_co2e['date'])

In [243]:
dfs = [
    df_gdp,
    df_pop,
    df_pm25,
    df_co2e
]

for df in dfs:
    df.drop(columns=['unit', 'obs_status', 'decimal', 'indicator.id'], inplace=True)
    df.rename(columns={'indicator.value':'measure', 'country.id':'country_code', 'country.value': 'country'}, inplace=True)

In [285]:
df_gdp.rename(columns={'value': 'gdp'}, inplace=True)
df_pop.rename(columns={'value': 'population'}, inplace=True)
df_pm25.rename(columns={'value': 'pm25'}, inplace=True)
df_co2e.rename(columns={'value': 'co2e'}, inplace=True)

In [288]:
df_co2e.columns

Index(['countryiso3code', 'date', 'co2e', 'measure', 'country_code',
       'country'],
      dtype='object')

In [290]:
for df in dfs:
    df.drop(columns=['measure'], inplace=True)

In [291]:
ttl_df = pd.merge(df_gdp, df_pop, how='inner', on=['countryiso3code', 'date', 'country_code','country'])

In [293]:
ttl_df = pd.merge(ttl_df, df_co2e, how='inner', on=['countryiso3code', 'date', 'country_code','country'])
ttl_df = pd.merge(ttl_df, df_pm25, how='inner', on=['countryiso3code', 'date', 'country_code','country'])

In [298]:
with open('data/world_bank_all.pkl', 'wb') as f:
    pickle.dump(ttl_df, f)

In [245]:
with open('data/world_bank_gdp.pkl', 'wb') as f:
    pickle.dump(df_gdp, f)

In [246]:
with open('data/world_bank_pop.pkl', 'wb') as f:
    pickle.dump(df_pop, f)

In [247]:
with open('data/world_bank_pm25.pkl', 'wb') as f:
    pickle.dump(df_pm25, f)

In [248]:
with open('data/world_bank_co2e.pkl', 'wb') as f:
    pickle.dump(df_co2e, f)

In [249]:
df_gdp[df_gdp['value'] == df_gdp['value'].max()]

Unnamed: 0,countryiso3code,date,value,measure,country_code,country
2976,WLD,2021,97529680000000.0,GDP (current US$),1W,World


In [265]:
df_pm25.groupby('country')['value'].max()

country
Afghanistan                    61.865329
Africa Eastern and Southern    33.833868
Africa Western and Central     68.843374
Albania                        24.947482
Algeria                        35.376846
                                 ...    
West Bank and Gaza             36.061285
World                          47.655591
Yemen, Rep.                    54.411846
Zambia                         28.360616
Zimbabwe                       26.288251
Name: value, Length: 266, dtype: float64

In [269]:
df_co2e[df_co2e['country']=='China'].to_csv('china_co2e.csv')

In [271]:
df_co2e['value'].max(), df_co2e['value'].min()

(47.6569620139265, 0.0)

In [272]:
df_pm25['value'].max(), df_pm25['value'].min()

(95.24264397, 5.163300753)

In [273]:
df_pop['value'].max(), df_pop['value'].min()

(7888305693.0, 2646.0)

In [274]:
df_gdp['value'].max(), df_gdp['value'].min()

(97529676807424.8, 8824743.94191672)

In [280]:
max_len = 0
for item in df_gdp['country']:
    # print(item)
    # print(len(item))
    # break
    max_len = max(len(item), max_len)

print(max_len)

52


In [305]:
df_sl = pd.read_csv('data/2.Sea_Level.csv', index_col=0)
df_clm = pd.read_csv('data/1.climate.csv', index_col=0)

In [306]:
df_sl.rename(columns={'year':'Year'}, inplace=True)

In [308]:
combined_df = pd.merge(df_clm, df_sl, how='inner', on=['Year'])

In [310]:
with open('data/global.pkl', 'wb') as f:
    pickle.dump(combined_df, f)