In [32]:
import pandas as pd

import missions.W1.M3.etl_project_gdp as etl_basic
import missions.W1.M3.etl_project_gdp_with_sql as etl_sqlite

In [33]:
# etl_sqlite.main()

In [34]:
df_gdp = pd.read_json('missions/W1/M3/data/Countries_by_GDP.json')
df_region = pd.read_json('missions/W1/M3/data/cultural-geo-mapper.json')

df_gdp_region = df_gdp.merge(df_region, on='country', how='left')

df_gdp_region['gdp'] = pd.to_numeric(df_gdp_region['gdp'].str.replace(",", ""), errors='coerce')  # str2int
df_gdp_region['gdp'] = (df_gdp_region['gdp'] / 1e3).round(2)  # GDP의 단위는 1B USD이어야 하고 소수점 2자리까지만 표시해 주세요.
df_gdp_region = df_gdp_region.sort_values(by='gdp', ascending=False).reset_index(drop=True)  # 해당 테이블에는 GDP가 높은 국가들이 먼저 나와야 합니다.

df_gdp_region

Unnamed: 0,country,gdp,year,type,region
0,United States,30337.16,2025.0,IMF,North America
1,China,19534.89,2025.0,IMF,East Asia
2,Germany,4921.56,2025.0,IMF,Western Europe
3,Japan,4389.33,2025.0,IMF,East Asia
4,India,4271.92,2025.0,IMF,South Asia
...,...,...,...,...,...
204,Greenland,,,IMF,North America
205,Eritrea,,,IMF,Sub-Saharan Africa
206,Zanzibar,,,IMF,Sub-Saharan Africa
207,Sint Maarten,,,IMF,North America


In [35]:
# GDP가 100B USD이상이 되는 국가만을 구해서 화면에 출력해야 합니다.
df_gdp_region[df_gdp_region['gdp'] >= 100]

Unnamed: 0,country,gdp,year,type,region
0,United States,30337.16,2025.0,IMF,North America
1,China,19534.89,2025.0,IMF,East Asia
2,Germany,4921.56,2025.0,IMF,Western Europe
3,Japan,4389.33,2025.0,IMF,East Asia
4,India,4271.92,2025.0,IMF,South Asia
...,...,...,...,...,...
67,Kenya,116.32,2024.0,IMF,Sub-Saharan Africa
68,Angola,113.29,2024.0,IMF,Sub-Saharan Africa
69,Guatemala,112.37,2024.0,IMF,Latin America
70,Oman,110.99,2024.0,IMF,Middle East


In [40]:
# 각 Region별로 top5 국가의 GDP 평균을 구해서 화면에 출력해야 합니다.

def top_n_mean(group, n=5):
    # 그룹 내 GDP 상위 n개 선택
    top_n = group.nlargest(n, 'gdp')
    return top_n['gdp'].mean()

region_top5 = df_gdp_region.groupby('region').apply(top_n_mean, n=5).sort_values(ascending=False)

  df_gdp_region.groupby('region', group_keys=False, dropna=False)


In [37]:
import sqlite3
import pandas as pd

def read_from_sqlite(db_name="missions/W1/M3/data/Countries_by_GDP.db"):
    # SQLite DB 연결
    conn = sqlite3.connect(db_name)
    
    # SQL 쿼리로 테이블 읽기
    query = "SELECT * FROM Countries_by_GDP"
    list_of_gdp = pd.read_sql_query(query, conn)
    
    # 연결 닫기
    conn.close()
    
    return list_of_gdp

def analyze_gdp(list_of_gdp):
    # GDP_USD_billion이 100 이상인 데이터 필터링
    filtered_data = list_of_gdp[list_of_gdp['GDP_USD_billion'] >= 100]
    return filtered_data

list_of_gdp = read_from_sqlite()
analyze_gdp(list_of_gdp)


Unnamed: 0,Country,GDP_USD_billion,Year,Type,Region
0,United States,30337.16,2025.0,IMF,North America
1,China,19534.89,2025.0,IMF,East Asia
2,Germany,4921.56,2025.0,IMF,Western Europe
3,Japan,4389.33,2025.0,IMF,East Asia
4,India,4271.92,2025.0,IMF,South Asia
...,...,...,...,...,...
68,Kenya,116.32,2024.0,IMF,Sub-Saharan Africa
69,Angola,113.29,2024.0,IMF,Sub-Saharan Africa
70,Guatemala,112.37,2024.0,IMF,Latin America
71,Oman,110.99,2024.0,IMF,Middle East


In [38]:
with sqlite3.connect("missions/W1/M3/data/Countries_by_GDP.db") as conn:
    cursor = conn.cursor()

    query = '''
        SELECT *
        FROM Countries_by_GDP
        WHERE GDP_USD_billion >= 100;
    '''
    cursor.execute(query)
    rows = cursor.fetchall()
    for row in rows:
        print(row)
    conn.rollback()

    cursor.close()

('United States', 30337.16, '2025.0', 'IMF', 'North America')
('China', 19534.89, '2025.0', 'IMF', 'East Asia')
('Germany', 4921.56, '2025.0', 'IMF', 'Western Europe')
('Japan', 4389.33, '2025.0', 'IMF', 'East Asia')
('India', 4271.92, '2025.0', 'IMF', 'South Asia')
('United Kingdom', 3730.26, '2025.0', 'IMF', 'Western Europe')
('France', 3283.43, '2025.0', 'IMF', 'Western Europe')
('Italy', 2459.6, '2025.0', 'IMF', 'Western Europe')
('Canada', 2330.31, '2025.0', 'IMF', 'North America')
('Brazil', 2307.16, '2025.0', 'IMF', 'Latin America')
('Russia', 2195.71, '2025.0', 'IMF', 'Eastern Europe')
('South Korea', 1947.13, '2025.0', 'IMF', 'East Asia')
('Australia', 1881.14, '2025.0', 'IMF', 'Oceania')
('Spain', 1827.58, '2025.0', 'IMF', 'Western Europe')
('Mexico', 1817.82, '2025.0', 'IMF', 'North America')
('Indonesia', 1492.62, '2025.0', 'IMF', 'Southeast Asia')
('Turkey', 1455.41, '2025.0', 'IMF', 'Eastern Europe')
('Netherlands', 1272.96, '2025.0', 'IMF', 'Western Europe')
('Saudi Arab

In [39]:
with sqlite3.connect("missions/W1/M3/data/Countries_by_GDP.db") as conn:
    cursor = conn.cursor()

    query = '''
        SELECT
        Region,
        ROUND(AVG(GDP_USD_billion), 2) AS Average_GDP_USD_billion
    FROM (
        SELECT
            Region,
            GDP_USD_billion,
            ROW_NUMBER() OVER (PARTITION BY Region ORDER BY GDP_USD_billion DESC) AS Rank
        FROM
            Countries_by_GDP
    )
    WHERE
        Rank <= 5
    GROUP BY
        Region
    ORDER BY
        Average_GDP_USD_billion DESC;
    '''
    cursor.execute(query)
    rows = cursor.fetchall()
    for row in rows:
        print(row)
    conn.rollback()

    cursor.close()

('North America', 8622.34)
('East Asia', 5421.57)
('Western Europe', 3244.49)
('Eastern Europe', 1085.0)
('South Asia', 1044.18)
('Latin America', 797.57)
('Southeast Asia', 711.89)
('Middle East', 598.13)
('Oceania', 436.66)
('Sub-Saharan Africa', 215.48)
('North Africa', 182.07)
('Central Asia', 108.96)
