In [2]:
import pandas as pd
import numpy as np
import requests
import xmltodict
import json
import traceback
from pandas.tseries.offsets import MonthEnd
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import platform
from sqlalchemy import create_engine

In [3]:
# 한글 폰트 설정
if platform.system() == 'Windows':
    plt.rc('font', family='Malgun Gothic')  # 윈도우 기본 한글 폰트
elif platform.system() == 'Darwin':  # macOS
    plt.rc('font', family='AppleGothic')
else:  # Linux (예: Google Colab)
    plt.rc('font', family='NanumGothic')

# 마이너스 깨짐 방지
plt.rcParams['axes.unicode_minus'] = False

In [4]:
def get_month_date(start, end, freq):
    mydates = pd.period_range(start, end, freq=freq)
    return mydates

def get_period_list(start, end, last_month):

    cut_num = last_month-12
    end = end+1

    period_list = []
    for y in range(start, end):
        for m in range(1,13):
            if len(str(m)) < 2:
                m = str(0)+ str(m)
            else:
                m = str(m)
            #print(str(y)+str(m))
            ym =str(y)+str(m)
            period_list.append(ym)
    period_list = period_list[:cut_num]
    return period_list

## 품목별 수출입실적 목록 검색
## 1년 12개월만 한번에 받을 수 있음
# start =시작일, end = 마지막날, 1년기간 12, 반년 6, 분기 3, hscode = 6자리 혹은 10자리

def get_country_export_by_item(start_list, end_list, hs_code):
    df_list = []
    for i, start in enumerate(start_list):
        end = end_list[i]
        service_key = '2o6NG3ixxDgGQ9S4dWUgsMac9WlxfX46%2BJvFRsAlsXQ6xVi6CZewvNJvbHd4S7exkWwt3YWoKSdwvUNb46kSTQ%3D%3D'
        url = f'https://apis.data.go.kr/1220000/Itemtrade/getItemtradeList?serviceKey={service_key}&strtYymm={start}&endYymm={end}&hsSgn={hs_code}'

        try:
            req = requests.get(url)
            json_dict = json.loads(json.dumps(xmltodict.parse(req.text), indent=4))
            items = json_dict['response']['body']['items']
            if items is None:
                print(f"⚠️ No data for HS {hs_code} from {start} to {end}")
                continue
            target_df = pd.DataFrame(items['item'])
            df_list.append(target_df)
        except Exception as e:
            print(f"❌ API 요청 실패: {hs_code} ({start} ~ {end})")
            print(traceback.format_exc())
            continue

    if df_list:
        return pd.concat(df_list, ignore_index=True)
    else:
        return pd.DataFrame()


def batch_export_by_hscode(cd_list, start_list, end_list, batch_size=20, region_name='전국'):
    all_export_q = []
    all_export_m = []
    error_list = []

    for i in range(0, len(cd_list), batch_size):
        hs_code_list = cd_list[i:i+batch_size]
        data_by_hscode = {}

        for hs_code in hs_code_list:
            try:
                target_df = get_country_export_by_item(start_list, end_list, hs_code)
                if target_df.empty:
                    print(f"⚠️ {hs_code}의 유효한 데이터가 없습니다.")
                    continue

                target_df = target_df[target_df['year'] != '총계'].copy()
                target_df['root_hs_code'] = hs_code
                data_by_hscode[hs_code] = target_df

            except Exception as e:
                print(f"❌ {hs_code} 처리 중 오류 발생:")
                print(traceback.format_exc())
                error_list.append(hs_code)
                continue

        if not data_by_hscode:
            print("⚠️ 병합할 데이터가 없습니다. 건너뜁니다.")
            continue

        try:
            merged_df = pd.concat(data_by_hscode).reset_index(drop=True)

            # 날짜 처리
            merged_df['new_date'] = pd.to_datetime(merged_df['year'].str.replace('.', '-')) + MonthEnd(0)
            merged_df.set_index('new_date', inplace=True)
            merged_df['new_year'] = merged_df.index.year
            merged_df['new_quarter'] = merged_df.index.quarter
            merged_df['new_month'] = merged_df.index.month

            # 숫자형 컬럼 변환
            numeric_cols = ['balPayments', 'expDlr', 'expWgt', 'impDlr', 'impWgt']
            for col in numeric_cols:
                if col in merged_df.columns:
                    merged_df[col] = pd.to_numeric(merged_df[col], errors='coerce')
                else:
                    merged_df[col] = 0.0

            # 정리된 컬럼
            clean_df = merged_df[['hsCode', 'new_year', 'new_quarter', 'new_month', 'statKor', 'balPayments', 'expDlr', 'expWgt', 'impDlr', 'impWgt']].copy()
            clean_df['root_hs_code'] = merged_df['root_hs_code'].values
            clean_df['region'] = region_name

            # 월별 집계
            export_df_by_m = clean_df.groupby(['root_hs_code', 'new_year', 'new_quarter', 'new_month']).agg({
                'balPayments': 'sum', 'expDlr': 'sum', 'impDlr': 'sum'
            }).reset_index()

            # 분기별 집계
            export_df_by_q = clean_df.groupby(['root_hs_code', 'new_year', 'new_quarter']).agg({
                'balPayments': 'sum', 'expDlr': 'sum', 'impDlr': 'sum'
            }).reset_index()

            export_df_by_q['region'] = region_name
            export_df_by_m['region'] = region_name

            all_export_q.append(export_df_by_q)
            all_export_m.append(export_df_by_m)

        except Exception as e:
            print("❌ 병합/정리 중 오류 발생")
            print(traceback.format_exc())
            error_list += hs_code_list

    final_q = pd.concat(all_export_q) if all_export_q else pd.DataFrame()
    final_m = pd.concat(all_export_m) if all_export_m else pd.DataFrame()

    return final_q, final_m, error_list

def add_yoy_growth(df: pd.DataFrame, steps: int) -> pd.DataFrame:
    """
    root_hs_code별로 expDlr, impDlr의 전년동기대비 증가율(%)을 계산하여 컬럼으로 추가하는 함수.
    월 단위 데이터는 steps=12, 분기 단위 데이터는 steps=4로 설정합니다.

    Parameters:
        df (pd.DataFrame): 무역 데이터. 'root_hs_code', 'new_year', 'new_month' 또는 'new_quarter', 'expDlr', 'impDlr' 포함.
        steps (int): 월 단위는 12, 분기 단위는 4로 설정.

    Returns:
        pd.DataFrame: expDlr_yoy, impDlr_yoy 컬럼이 추가된 데이터프레임
    """
    import pandas as pd

    df = df.copy()

    if steps == 12:
        # 월 기준
        df['year_month'] = pd.to_datetime(df['new_year'].astype(str) + '-' + df['new_month'].astype(str) + '-01')

    elif steps == 4:
        # 분기 마지막 월로 매핑: Q1→03, Q2→06, Q3→09, Q4→12
        end_month = df['new_quarter'].map({1: '03', 2: '06', 3: '09', 4: '12'})
        df['year_month'] = pd.to_datetime(df['new_year'].astype(str) + '-' + end_month + '-01')

    else:
        raise ValueError("steps 값은 12(월 단위) 또는 4(분기 단위)여야 합니다.")

    # 정렬 및 성장률 계산
    df = df.sort_values(['root_hs_code', 'year_month'])

    df['expDlr_yoy'] = (
        df.groupby('root_hs_code')['expDlr']
        .transform(lambda x: x.pct_change(periods=steps))
    )

    df['impDlr_yoy'] = (
        df.groupby('root_hs_code')['impDlr']
        .transform(lambda x: x.pct_change(periods=steps))
    )

    return df


def plot_column_by_hscode(df, hs_code, col_name, start_date=None, end_date=None):
    """
    특정 root_hs_code에 대해 year_month를 X축, 지정된 컬럼(col_name)을 Y축으로 하는 라인차트를 그립니다.

    Parameters:
        df (pd.DataFrame): 'year_month', 'root_hs_code', col_name 컬럼이 포함된 DataFrame
        hs_code (str): 시각화할 root_hs_code
        col_name (str): Y축에 사용할 컬럼 이름
        start_date (str or pd.Timestamp): 시작 날짜 (예: '2020-01-01')
        end_date (str or pd.Timestamp): 종료 날짜 (예: '2024-12-31')
    """
    # 필터링 및 정렬
    target_df = df[df['root_hs_code'] == hs_code].sort_values('year_month')

    if target_df.empty:
        print(f"⚠️ root_hs_code {hs_code}에 해당하는 데이터가 없습니다.")
        return

    if col_name not in target_df.columns:
        print(f"❌ '{col_name}' 컬럼이 DataFrame에 없습니다.")
        return

    # 날짜 범위 필터링
    if start_date:
        target_df = target_df[target_df['year_month'] >= pd.to_datetime(start_date)]
    if end_date:
        target_df = target_df[target_df['year_month'] <= pd.to_datetime(end_date)]

    if target_df.empty:
        print(f"⚠️ 지정한 날짜 범위에 데이터가 없습니다.")
        return

    # Plot
    plt.figure(figsize=(12, 6))
    plt.plot(target_df['year_month'], target_df[col_name], marker='o', label=col_name)

    # 마지막 값에 텍스트 표시
    last_x = target_df['year_month'].iloc[-1]
    last_y = target_df[col_name].iloc[-1]
    plt.text(last_x, last_y, f"{last_y * 100:,.2f}%", fontsize=25, ha='left', va='bottom', color='red')

    plt.title(f"{col_name} 추이 (root_hs_code: {hs_code})")
    plt.xlabel("Year-Month")
    plt.ylabel(col_name)
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


In [5]:
data = pd.read_excel(r'C:\Users\MetaM\PycharmProjects\pythonProject3\HS_Code_500\HS_Code_500.xlsx')
cd_array = data['HS_Code'].unique()
cd_list = cd_array.tolist()


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\MetaM\\PycharmProjects\\pythonProject3\\HS_Code_500\\HS_Code_500.xlsx'

In [5]:
# cd_list = cd_list[:]
len(cd_list)

500

In [6]:
# cd_list = ['854231', '854232', '854239']
start_list = [ '200701', '200801', '200901', 
              '201001', '201101', '201201', '201301', '201401', 
              '201501', '201601', '201701', '201801', '201901',
              '202001', '202101', '202201', '202301', '202401', '202501']
end_list = [ '200712', '200812', '200912', 
              '201012', '201112', '201212', '201312', '201412', 
              '201512', '201612', '201712', '201812', '201912',
              '202012', '202112', '202212', '202312', '202412', '202512']

export_q, export_m, error_list = batch_export_by_hscode(cd_list, start_list, end_list)

⚠️ No data for HS 870380 from 200701 to 200712
⚠️ No data for HS 870380 from 200801 to 200812
⚠️ No data for HS 870380 from 200901 to 200912
⚠️ No data for HS 870380 from 201001 to 201012
⚠️ No data for HS 870380 from 201101 to 201112
⚠️ No data for HS 870380 from 201201 to 201212
⚠️ No data for HS 870380 from 201301 to 201312
⚠️ No data for HS 870380 from 201401 to 201412
⚠️ No data for HS 870380 from 201501 to 201512
⚠️ No data for HS 870380 from 201601 to 201612
⚠️ No data for HS 271012 from 200701 to 200712
⚠️ No data for HS 271012 from 200801 to 200812
⚠️ No data for HS 271012 from 200901 to 200912
⚠️ No data for HS 271012 from 201001 to 201012
⚠️ No data for HS 271012 from 201101 to 201112
⚠️ No data for HS 852412 from 200701 to 200712
⚠️ No data for HS 852412 from 200801 to 200812
⚠️ No data for HS 852412 from 200901 to 200912
⚠️ No data for HS 852412 from 201001 to 201012
⚠️ No data for HS 852412 from 201101 to 201112
⚠️ No data for HS 852412 from 201201 to 201212
⚠️ No data fo

In [60]:
# export_df_by_m 은 월별 수출입 데이터라고 가정
export_m_with_yoy = add_yoy_growth(export_m,steps=12)
export_q_with_yoy = add_yoy_growth(export_q,steps=4)

In [62]:
export_m_with_yoy['root_hs_code'] = export_m_with_yoy['root_hs_code'].astype(str)
export_q_with_yoy['root_hs_code'] = export_q_with_yoy['root_hs_code'].astype(str)
# export_m_with_yoy.rename(columns={'year_month':'period'}, inplace=True)
# export_m_with_yoy.to_csv('export_m_with_yoy.csv')

In [63]:
from sqlalchemy import create_engine
import pymysql

# 4. DB 접속 정보 입력 (수정 필요)
# host = 'hystox74.synology.me'           # 예: 'localhost'
# host = '192.168.0.230'
# port = 3307                  # 예: 3306 또는 사용자 설정 포트
# user = 'stox7412'
# password = 'Apt106503!~'
# database = 'investar'

# ✅ DB 정보 설정
db_info = {
    'host': '192.168.0.230',
    'port': 3307,
    'user': 'stox7412',
    'password': 'Apt106503!~',
    'database': 'investar'
}

df_new = export_m_with_yoy

# ✅ 날짜 컬럼 형식 통일
df_new['year_month'] = pd.to_datetime(df_new['year_month'])

# ✅ SQLAlchemy 엔진 및 Raw Connection 생성
engine = create_engine(
    f"mysql+pymysql://{db_info['user']}:{db_info['password']}@{db_info['host']}:{db_info['port']}/{db_info['database']}"
)
conn = engine.raw_connection()
cursor = conn.cursor()

# ✅ 테이블 생성 쿼리
create_table_sql = """
CREATE TABLE IF NOT EXISTS trade_data_monthly (
    `year_month` DATE,
    `root_hs_code` VARCHAR(20),
    `new_year` INT,
    `new_quarter` INT,
    `new_month` INT,
    `balPayments` FLOAT,
    `expDlr` FLOAT,
    `impDlr` FLOAT,
    `region` VARCHAR(50),
    `expDlr_yoy` FLOAT,
    `impDlr_yoy` FLOAT,
    PRIMARY KEY (`year_month`, `root_hs_code`)
);
"""
cursor.execute(create_table_sql)
conn.commit()

# ✅ 기존 데이터 조회
existing_query = "SELECT `year_month`, `root_hs_code` FROM trade_data_monthly"
existing_df = pd.read_sql(existing_query, engine)

# ✅ 중복 제거
merged = pd.merge(df_new, existing_df, on=['year_month', 'root_hs_code'], how='left', indicator=True)
df_to_upload = merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])

# ✅ inf/-inf/NaN 처리
df_to_upload = df_to_upload.replace([np.inf, -np.inf], np.nan)
df_to_upload = df_to_upload.where(pd.notnull(df_to_upload), None)

# ✅ 업로드
if not df_to_upload.empty:
    df_to_upload.to_sql(name='trade_data_monthly', con=engine, if_exists='append', index=False)
    print(f"✅ 중복 제외 후 {len(df_to_upload)}건 업로드 완료")
else:
    print("⚠️ 업로드할 새로운 데이터가 없습니다.")

# ✅ 연결 종료
cursor.close()
conn.close()

⚠️ 업로드할 새로운 데이터가 없습니다.


In [58]:
import pymysql
import mysql.connector
from tqdm import tqdm

In [59]:
# host_num = '49.175.148.195'
# cnx = pymysql.connect(host= host_num, port=3307, db='investar',  user='stox7412', passwd='Apt106503!~', autocommit=True)
# cursor = cnx.cursor()

In [60]:
host_num = '49.175.148.195'
cnx = pymysql.connect(host= host_num, port=3307, db='investar',  user='stox7412', passwd='Apt106503!~', autocommit=True)
cursor = cnx.cursor()

# 테이블 생성
cursor.execute('''
    CREATE TABLE IF NOT EXISTS HS_CODE(
        date DATE,
        수출품목 vARCHAR(255),
        hs_code INT,
        수출액 INT,
        수출중량 INT,
        수입액 INT,
        수입중량 INT,
        순수출액 INT,
        수출액_mom FLOAT,
        수입액_mom FLOAT,
        순수출액_mom FLOAT,
        수출액_yoy FLOAT,
        수입액_yoy FLOAT,
        순수출액_yoy FLOAT,
        PRIMARY KEY (date, hs_code) 
    )
''')


# 중복 데이터 필터링 후 삽입
for index, row in tqdm(target_df4.iterrows(), total=len(target_df4)):
    cursor.execute('''
        INSERT IGNORE INTO HS_CODE (date, 수출품목, hs_code, 수출액, 수출중량, 수입액, 수입중량, 순수출액, 수출액_mom, 수입액_mom, 순수출액_mom, 수출액_yoy, 수입액_yoy, 순수출액_yoy)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    ''', (row['date'], row['수출품목'], row['hs_code'], row['수출액'], row['수출중량'],
          row['수입액'], row['수입중량'], row['순수출액'], row['수출액_mom'], row['수입액_mom'], row['순수출액_mom'],
          row['수출액_yoy'], row['수입액_yoy'],  row['순수출액_yoy']))

# 변경사항 저장 및 연결 종료
cnx.commit()
cnx.close()

100%|███████████████████████████████████████████████████████████████████████████| 12980/12980 [01:28<00:00, 146.38it/s]


In [61]:
target_df4

Unnamed: 0,date,수출품목,hs_code,수출액,수출중량,수입액,수입중량,순수출액,수출액_mom,수입액_mom,순수출액_mom,수출액_yoy,수입액_yoy,순수출액_yoy
0,2010-01-31,복합구조칩 집적회로,8542323000,322031578,29172,46890817,9748,275140761,,,,,,
1,2010-02-28,복합구조칩 집적회로,8542323000,388024602,36475,54001394,9038,334023208,0.204927,0.151641,0.214008,,,
2,2010-03-31,복합구조칩 집적회로,8542323000,487808342,47698,60145204,11089,427663138,0.257158,0.113771,0.28034,,,
3,2010-04-30,복합구조칩 집적회로,8542323000,543394929,52871,74019608,8265,469375321,0.113952,0.230682,0.097535,,,
4,2010-05-31,복합구조칩 집적회로,8542323000,555547187,58738,42287936,6323,513259251,0.022364,-0.428693,0.093494,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12975,2022-08-31,기타,7308909000,98802155,29856701,81580077,47139996,17222078,-0.3467,-0.064857,-0.730894,0.0746,-0.36776,-1.46433
12976,2022-09-30,기타,7308909000,62287908,20669417,90483437,50014630,-28195529,-0.369569,0.109136,-2.637173,-0.834279,-0.097361,-1.1023
12977,2022-10-31,기타,7308909000,120283198,33361635,115050448,65662629,5232750,0.931084,0.271508,-1.185588,0.711546,1.050352,-0.630586
12978,2022-11-30,기타,7308909000,348424894,55316162,91128564,54296472,257296330,1.896705,-0.207925,48.170385,3.377735,0.272817,31.185041
