In [47]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime as dt
import sqlite3
import json
from tabulate import tabulate
from enum import Enum

In [48]:
class Mode(Enum):
    EXTRACT = 'EXTRACT'
    TRANSFORM = 'TRANSFORM'
    LOAD = 'LOAD'

def write_log(state: Mode, is_start: bool):
    with open('./data/team_etl_project_log.txt', 'a') as log:
        time = dt.datetime.now().strftime('%Y-%b-%d-%H-%M-%S')
        if is_start:
            log.write(f'{time}, [{state.value}] Started\n')
        else:
            log.write(f'{time}, [{state.value}] Ended\n')
    log.close()

def print_query_result(query:str):
    con = sqlite3.connect('./data/team_World_Economies.db')
    cursor = con.cursor()
    data = cursor.execute(query).fetchall()
    formatted_data = [
    [f"{x:,.2f}" if isinstance(x, float) else x for x in row]
    for row in data
    ]
    columns = [description[0] for description in cursor.description]
    print(tabulate(formatted_data, headers=columns, tablefmt='grid'))
    con.close()

In [49]:
# [EXTRACT]
state = Mode.EXTRACT
write_log(state, True)

gdp_data = requests.get('https://www.imf.org/external/datamapper/api/v1/NGDPD?periods=2025,2025')
group_data = requests.get('https://www.imf.org/external/datamapper/api/v1/groups')
region_data = requests.get('https://www.imf.org/external/datamapper/api/v1/regions')

with open('./data/team_gdp.json','w', encoding='utf-8') as gdp_file:
    json.dump(gdp_data.text, gdp_file, indent=4)
gdp_file.close()

with open('./data/team_group.json','w', encoding='utf-8') as group_file:
    json.dump(group_data.text, group_file, indent=4)
group_file.close()

with open('./data/team_region.json','w', encoding='utf-8') as region_file:
    json.dump(region_data.text, region_file, indent=4)
region_file.close()

write_log(state, False)
state = Mode.TRANSFORM

In [None]:
with open('./data/team_region.json','r', encoding='utf-8') as region_file:
    t = json.load(region_file)
region_file.close()

Unnamed: 0,AFQ,AFRREO,APQ,AZQ,CAQ,CBQ,CMQ,EAQ,EEQ,EUQ,...,SSQ,WEQ,WHQ,AFR_SSQ,SPR_GD_MECA,SPR_GD_EUR,SPR_GD_AFR,SPR_GD_CPI,SPR_GD_WHD,SPR_GD_Asia
label,Africa (Region),African Regional Economic Outlook,Asia and Pacific,Australia and New Zealand,Central Asia and the Caucasus,Caribbean,Central America,East Asia,Eastern Europe,Europe,...,Sub-Saharan Africa (Region),Western Europe,Western Hemisphere (Region),Sub-Saharan Africa (Region),Middle East and Central Asia,Europe,Africa,Caribbean and Pacific Islands,Western Hemisphere,Asia


In [38]:
# [TRANSFORM]
write_log(state, True)

gdp_json_obejct = json.loads(gdp_data.text)
group_json_object = json.loads(group_data.text)
region_json_object = json.loads(region_data.text)

gdp_df = pd.DataFrame(gdp_json_obejct['values']['NGDPD'])
group_df = pd.DataFrame(group_json_object['groups'])
region_df = pd.DataFrame(region_json_object['regions'])

gdp_df = pd.melt(gdp_df)
gdp_df.rename(columns={'variable':'Country', 'value':'GDP_USD_billion'},inplace=True)
gdp_df.sort_values(by='GDP_USD_billion',ascending=False, inplace=True)
gdp_df['GDP_USD_billion'] = round(gdp_df['GDP_USD_billion'], 2)
gdp_df.reset_index(inplace=True, drop=True)
gdp_df

Unnamed: 0,Country,GDP_USD_billion
0,WEOWORLD,115494.31
1,ADVEC,67561.04
2,MAE,51451.65
3,OEMDC,47933.27
4,APQ,41024.44
...,...,...
216,PLW,0.35
217,KIR,0.33
218,MHL,0.29
219,NRU,0.18


In [39]:
group_df = pd.melt(group_df)
group_df.rename(columns={'variable':'group','value':'mean'}, inplace=True)
group_df

Unnamed: 0,group,mean
0,ADVEC,Advanced economies
1,AEEUEJ,"Adv econ excl US, Euro, Japan"
2,AFR,Africa (Analytical)
3,AS5,ASEAN-5
4,CEE,Central and Eastern Europe
...,...,...
124,FR_FC_ADV,Advanced Economies
125,FR_FC_EME,Emerging Market and Developing Economies
126,FR_FC_EU,European Union
127,FR_FC_NonEU,Non European Union


In [40]:
region_df = pd.melt(region_df)
region_df.rename(columns={'variable':'region','value':'mean'}, inplace=True)
region_df

Unnamed: 0,region,mean
0,AFQ,Africa (Region)
1,AFRREO,African Regional Economic Outlook
2,APQ,Asia and Pacific
3,AZQ,Australia and New Zealand
4,CAQ,Central Asia and the Caucasus
5,CBQ,Caribbean
6,CMQ,Central America
7,EAQ,East Asia
8,EEQ,Eastern Europe
9,EUQ,Europe


In [41]:
# 국가가 아닌 데이터 제거
gdp_df = gdp_df[~gdp_df['Country'].isin(group_df['group'])]
gdp_df = gdp_df[~gdp_df['Country'].isin(region_df['region'])]
gdp_df.reset_index(inplace=True, drop=True)
gdp_df

Unnamed: 0,Country,GDP_USD_billion
0,USA,30337.16
1,CHN,19534.89
2,DEU,4921.56
3,JPN,4389.33
4,IND,4271.92
...,...,...
184,PLW,0.35
185,KIR,0.33
186,MHL,0.29
187,NRU,0.18


In [42]:
# region 매칭
region_info_df = pd.read_csv('./data/region.csv')
gdp_df = gdp_df.merge(
    region_info_df[['alpha-3','region']],
    left_on='Country',
    right_on='alpha-3',
    how='left'
    )
gdp_df.drop(columns='alpha-3', inplace=True)

In [43]:
gdp_df

Unnamed: 0,Country,GDP_USD_billion,region
0,USA,30337.16,Americas
1,CHN,19534.89,Asia
2,DEU,4921.56,Europe
3,JPN,4389.33,Asia
4,IND,4271.92,Asia
...,...,...,...
184,PLW,0.35,Oceania
185,KIR,0.33,Oceania
186,MHL,0.29,Oceania
187,NRU,0.18,Oceania


In [44]:
write_log(state, False)
state = Mode.LOAD

In [45]:
# [LOAD]
write_log(state, True)
con = sqlite3.connect('./data/team_World_Economies.db')
gdp_df.to_sql('gdp',con, if_exists='replace')
con.close()
write_log(state, False)

In [46]:
# [Query를 사용해 출력하기]
print_query_result(
    '''
    SELECT *
    FROM gdp
    WHERE GDP_USD_billion > 100;
    '''
    )

print_query_result(
    '''
    WITH rankedByRegionGdp AS (
        SELECT
            Country,
            region,
            GDP_USD_billion,
            ROW_NUMBER() OVER (PARTITION BY region ORDER BY GDP_USD_billion DESC) AS rank
        FROM gdp
    )
    SELECT region, AVG(GDP_USD_billion)
    FROM rankedByRegionGdp
    WHERE rank <= 5
    GROUP BY region;
    '''
)

+---------+-----------+-------------------+----------+
|   index | Country   | GDP_USD_billion   | region   |
|       0 | USA       | 30,337.16         | Americas |
+---------+-----------+-------------------+----------+
|       1 | CHN       | 19,534.89         | Asia     |
+---------+-----------+-------------------+----------+
|       2 | DEU       | 4,921.56          | Europe   |
+---------+-----------+-------------------+----------+
|       3 | JPN       | 4,389.33          | Asia     |
+---------+-----------+-------------------+----------+
|       4 | IND       | 4,271.92          | Asia     |
+---------+-----------+-------------------+----------+
|       5 | GBR       | 3,730.26          | Europe   |
+---------+-----------+-------------------+----------+
|       6 | FRA       | 3,283.43          | Europe   |
+---------+-----------+-------------------+----------+
|       7 | ITA       | 2,459.60          | Europe   |
+---------+-----------+-------------------+----------+
|       8 