In [None]:
%pip -q install pandas sqlalchemy pymysql python-dotenv

In [None]:
MYSQL_HOST = "127.0.0.1"
MYSQL_PORT = 3306
MYSQL_USER = "root"
MYSQL_PASSWORD = "root"
MYSQL_DB = "project_db"      # 없으면 아래 셀에서 생성
TABLE_NAME = "districts"  # 원하는 테이블명
CSV_PATH = "./districts.csv"


In [None]:
from dotenv import load_dotenv
import os

load_dotenv()  # .env 파일이 현재 경로에 있을 때

MYSQL_HOST = os.getenv("MYSQL_HOST", "127.0.0.1")
MYSQL_PORT = int(os.getenv("MYSQL_PORT", "3306"))
MYSQL_USER = os.getenv("MYSQL_USER", "root")
MYSQL_PASSWORD = os.getenv("MYSQL_PASSWORD", "root")
MYSQL_DB = os.getenv("MYSQL_DB", "project_db")
TABLE_NAME = os.getenv("MYSQL_TABLE", "districts")
CSV_PATH = os.getenv("CSV_PATH", "./districts.csv")


In [None]:
import pandas as pd

# 흔한 인코딩들 순차 시도(깨짐 방지)
encodings_to_try = ["utf-8-sig", "utf-8", "cp949", "euc-kr"]

last_err = None
for enc in encodings_to_try:
    try:
        df = pd.read_csv(CSV_PATH, encoding=enc)
        used_encoding = enc
        break
    except Exception as e:
        last_err = e
else:
    raise last_err

print("Loaded rows:", len(df))
print("Encoding used:", used_encoding)
display(df.head(5))
print(df.dtypes)


In [None]:
import re

def sanitize_columns(cols):
    out = []
    seen = {}
    for c in cols:
        c0 = str(c).strip()
        c1 = re.sub(r"\s+", "_", c0)
        c1 = re.sub(r"[^0-9a-zA-Z_]", "", c1)
        c1 = c1.lower()
        if not c1:
            c1 = "col"
        if c1[0].isdigit():
            c1 = "c_" + c1
        
        k = seen.get(c1, 0)
        if k > 0:
            new = f"{c1}_{k+1}"
        else:
            new = c1
        seen[c1] = k + 1
        out.append(new)
    return out

df.columns = sanitize_columns(df.columns)
print(df.columns.tolist())
display(df.head(3))


In [None]:
from sqlalchemy import create_engine, text
from sqlalchemy.types import String, Integer, Float, DateTime, Boolean
import numpy as np

engine_root = create_engine(
    f"mysql+pymysql://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/?charset=utf8mb4",
    pool_pre_ping=True,
)

# DB 없으면 생성
with engine_root.begin() as conn:
    conn.execute(text(f"CREATE DATABASE IF NOT EXISTS `{MYSQL_DB}` DEFAULT CHARACTER SET utf8mb4"))

engine = create_engine(
    f"mysql+pymysql://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}?charset=utf8mb4",
    pool_pre_ping=True,
)

# pandas dtype -> SQLAlchemy dtype 대략 매핑(필요하면 커스텀 가능)
dtype_map = {}
for c in df.columns:
    s = df[c]
    if pd.api.types.is_integer_dtype(s):
        dtype_map[c] = Integer()
    elif pd.api.types.is_float_dtype(s):
        dtype_map[c] = Float()
    elif pd.api.types.is_bool_dtype(s):
        dtype_map[c] = Boolean()
    elif pd.api.types.is_datetime64_any_dtype(s):
        dtype_map[c] = DateTime()
    else:
        # 문자열 길이 추정(너무 길면 TEXT가 필요하지만, 여기선 안전하게 넉넉히)
        max_len = int(s.dropna().astype(str).map(len).max()) if s.dropna().shape[0] else 1
        max_len = min(max(max_len, 16), 500)
        dtype_map[c] = String(length=max_len)

# 적재
df.to_sql(
    name=TABLE_NAME,
    con=engine,
    if_exists="replace",   # append 로 바꾸면 누적 적재
    index=False,
    chunksize=5000,        # 대용량이면 더 키워도 됨
    method="multi",
    dtype=dtype_map,
)

print(f"Done. Loaded {len(df)} rows into `{MYSQL_DB}`.`{TABLE_NAME}`")
