In [2]:
import pymysql
from tqdm import tqdm
import pandas as pd

# 1. DB 연결
conn = pymysql.connect(
    host="premo-instance.czwmu86ms4yl.us-east-1.rds.amazonaws.com",
    user="admin",
    password="tteam891",
    db="premo",
    charset="utf8mb4",
    cursorclass=pymysql.cursors.DictCursor
)

# 2. 팀 이름 매핑 (CSV → DB)
team_folder_map = {
    "AFC Bournemouth": "Bournemouth"
}

# 3. team_name → team_id 매핑 (common_name + short_name, 소문자)
with conn.cursor() as cursor:
    cursor.execute("SELECT team_id, team_common_name, short_name FROM team")
    team_rows = cursor.fetchall()

team_name_to_id = {}
for row in team_rows:
    team_name_to_id[row['team_common_name'].lower()] = row['team_id']
    team_name_to_id[row['short_name'].lower()] = row['team_id']

# 4. CSV 로딩
df = pd.read_csv("../../output/predicted_score.csv")

# 5. INSERT SQL
insert_sql = """
INSERT INTO model_output (
    match_id,
    home_winrate, drawrate, away_winrate,
    home_score_1, away_score_1, score_1_prob,
    home_score_2, away_score_2, score_2_prob,
    home_score_3, away_score_3, score_3_prob,
    prediction_date, created_at, updated_at
) VALUES (
    %(match_id)s,
    %(home_winrate)s, %(drawrate)s, %(away_winrate)s,
    %(home_score_1)s, %(away_score_1)s, %(score_1_prob)s,
    %(home_score_2)s, %(away_score_2)s, %(score_2_prob)s,
    %(home_score_3)s, %(away_score_3)s, %(score_3_prob)s,
    NOW(), NOW(), NOW()
);
"""

# 6. INSERT 반복
with conn.cursor() as cursor:
    for i in tqdm(range(len(df))):
        row = df.iloc[i]

        # 결측값 존재 시 스킵
        if pd.isna(row["Home Win Probability"]) or pd.isna(row["Draw Probability"]) or pd.isna(row["Away Win Probability"]):
            print(f"⚠️ 확률 결측 → 건너뜀: {row.get('Home Team')} vs {row.get('Away Team')} on {row.get('Date')}")
            continue

        # 팀 이름 매핑 적용
        home_team_name = team_folder_map.get(row["Home Team"], row["Home Team"]).lower()
        away_team_name = team_folder_map.get(row["Away Team"], row["Away Team"]).lower()

        home_team_id = team_name_to_id.get(home_team_name)
        away_team_id = team_name_to_id.get(away_team_name)
        row_date = pd.to_datetime(row["Date"]).strftime("%Y-%m-%d")

        if home_team_id is None or away_team_id is None:
            print(f"❌ 팀 ID 매핑 실패: {row['Home Team']} / {row['Away Team']}")
            continue

        # match_id 찾기
        cursor.execute("""
            SELECT match_id FROM `match`
            WHERE home_team_id = %s AND away_team_id = %s AND start_time = %s
        """, (home_team_id, away_team_id, row_date))
        result = cursor.fetchone()

        if not result:
            print(f"❌ Match not found: {row['Home Team']} vs {row['Away Team']} on {row['Date']}")
            continue

        match_id = result["match_id"]

        # INSERT 데이터 구성
        insert_data = {
            "match_id": match_id,
            "home_winrate": round(float(row["Home Win Probability"]) * 100, 2),
            "drawrate": round(float(row["Draw Probability"]) * 100, 2),
            "away_winrate": round(float(row["Away Win Probability"]) * 100, 2),
        }

        for k in range(1, 4):
            score = str(row.get(f"Top-{k}", "")).replace("'", "").strip()
            prob = row.get(f"Top-{k} Prob", 0)

            if "-" in score:
                h, a = score.split("-", 1)
                h = h.strip()
                a = a.strip()
            else:
                h, a = None, None

            insert_data[f"home_score_{k}"] = h if h else None
            insert_data[f"away_score_{k}"] = a if a else None
            insert_data[f"score_{k}_prob"] = float(prob) * 100 if pd.notnull(prob) else None
            
        cursor.execute(insert_sql, insert_data)

    conn.commit()

print("✅ model_output 테이블에 INSERT 완료")

 46%|████▋     | 237/510 [01:26<01:39,  2.75it/s]

⚠️ 확률 결측 → 건너뜀: West Ham United vs Chelsea on 2024-09-21


 58%|█████▊    | 297/510 [01:47<01:18,  2.72it/s]

⚠️ 확률 결측 → 건너뜀: Arsenal vs Nottingham Forest on 2024-11-23


 90%|█████████ | 461/510 [02:47<00:17,  2.73it/s]

⚠️ 확률 결측 → 건너뜀: Nottingham Forest vs Everton on 2025-04-12


100%|██████████| 510/510 [03:04<00:00,  2.76it/s]

✅ model_output 테이블에 INSERT 완료



