In [1]:
%pip -q install pandas sqlalchemy pymysql python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [2]:
MYSQL_HOST = "127.0.0.1"
MYSQL_PORT = 3306
MYSQL_USER = "root"
MYSQL_PASSWORD = "root"
MYSQL_DB = "project_db"      # 없으면 아래 셀에서 생성
TABLE_NAME = "districts"  # 원하는 테이블명
CSV_PATH = "./districts.csv"


In [3]:
from dotenv import load_dotenv
import os

load_dotenv()  # .env 파일이 현재 경로에 있을 때

MYSQL_HOST = os.getenv("MYSQL_HOST", "127.0.0.1")
MYSQL_PORT = int(os.getenv("MYSQL_PORT", "3306"))
MYSQL_USER = os.getenv("MYSQL_USER", "root")
MYSQL_PASSWORD = os.getenv("MYSQL_PASSWORD", "root")
MYSQL_DB = os.getenv("MYSQL_DB", "project_db")
TABLE_NAME = os.getenv("MYSQL_TABLE", "districts")
CSV_PATH = os.getenv("CSV_PATH", "./districts.csv")


In [4]:
import pandas as pd

# 흔한 인코딩들 순차 시도(깨짐 방지)
encodings_to_try = ["utf-8-sig", "utf-8", "cp949", "euc-kr"]

last_err = None
for enc in encodings_to_try:
    try:
        df = pd.read_csv(CSV_PATH, encoding=enc)
        used_encoding = enc
        break
    except Exception as e:
        last_err = e
else:
    raise last_err

print("Loaded rows:", len(df))
print("Encoding used:", used_encoding)
display(df.head(5))
print(df.dtypes)


Loaded rows: 20555
Encoding used: utf-8-sig


Unnamed: 0,district_id,district_name,district_code,district_short_name
0,1100000000,서울특별시,11000,서울특별시
1,1111000000,서울특별시 종로구,11110,서울특별시 종로구
2,1111010100,서울특별시 종로구 청운동,11110,서울특별시 종로구
3,1111010200,서울특별시 종로구 신교동,11110,서울특별시 종로구
4,1111010300,서울특별시 종로구 궁정동,11110,서울특별시 종로구


district_id             int64
district_name          object
district_code           int64
district_short_name    object
dtype: object


In [5]:
import re

def sanitize_columns(cols):
    out = []
    seen = {}
    for c in cols:
        c0 = str(c).strip()
        c1 = re.sub(r"\s+", "_", c0)
        c1 = re.sub(r"[^0-9a-zA-Z_]", "", c1)
        c1 = c1.lower()
        if not c1:
            c1 = "col"
        if c1[0].isdigit():
            c1 = "c_" + c1
        
        k = seen.get(c1, 0)
        if k > 0:
            new = f"{c1}_{k+1}"
        else:
            new = c1
        seen[c1] = k + 1
        out.append(new)
    return out

df.columns = sanitize_columns(df.columns)
print(df.columns.tolist())
display(df.head(3))


['district_id', 'district_name', 'district_code', 'district_short_name']


Unnamed: 0,district_id,district_name,district_code,district_short_name
0,1100000000,서울특별시,11000,서울특별시
1,1111000000,서울특별시 종로구,11110,서울특별시 종로구
2,1111010100,서울특별시 종로구 청운동,11110,서울특별시 종로구


In [8]:
from sqlalchemy import create_engine, text
from sqlalchemy.types import String, Integer, BigInteger, Float, DateTime, Boolean
import pandas as pd
import numpy as np

# =========================
# Root 엔진 (DB 생성용)
# =========================
engine_root = create_engine(
    f"mysql+pymysql://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/?charset=utf8mb4",
    pool_pre_ping=True,
)

# DB 없으면 생성
with engine_root.begin() as conn:
    conn.execute(
        text(f"CREATE DATABASE IF NOT EXISTS `{MYSQL_DB}` DEFAULT CHARACTER SET utf8mb4")
    )

# =========================
# 실제 DB 엔진
# =========================
engine = create_engine(
    f"mysql+pymysql://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}?charset=utf8mb4",
    pool_pre_ping=True,
)

# =========================
# pandas dtype → SQLAlchemy dtype 매핑
# =========================
dtype_map = {}

for c in df.columns:
    s = df[c]

    # ⭐ district_id는 무조건 BIGINT
    if c == "district_id":
        dtype_map[c] = BigInteger()
        continue

    #
