In [None]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score

df = pd.read_csv("../data/datas/2425/england-premier-league-final-2024-to-2025-stats_processed.csv")

# 앞 70% → train / 뒤 30% → test
split_idx = int(len(df) * 0.7)
train_df = df.iloc[:split_idx]
test_df = df.iloc[split_idx:]

# feature/target 분리
exclude = ['home_team_name', 'away_team_name', 'match_date',
'home_team_goal_count', 'away_team_goal_count', 'result', 'home_points', 'away_points']

X_train = train_df.drop(columns=exclude, errors='ignore').select_dtypes(include=['float64', 'int64']).fillna(0)
y_train = train_df['home_team_goal_count']

X_test = test_df.drop(columns=exclude, errors='ignore').select_dtypes(include=['float64', 'int64']).fillna(0)
y_test = test_df['home_team_goal_count']

# 학습 및 예측
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

model = XGBRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# 평가
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
accuracy = (y_pred.round() == y_test).mean()

print(f"🎯 Home Goal 예측 결과")
print(f"MAE: {mae:.4f} | R²: {r2:.4f} | 정수 정확도: {accuracy:.4f}")


🎯 Home Goal 예측 결과
MAE: 0.3111 | R²: 0.7898 | 정수 정확도: 0.8070


In [8]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import os

data_root = "../data/datas/"
train_dfs = []

# 1. 전체 시즌 중 2425 제외하고 학습용으로 모으기
for root, dirs, files in os.walk(data_root):
    for file in files:
        if file.endswith('.csv') and 'final' in file and '2425' not in file:
            path = os.path.join(root, file)
            df = pd.read_csv(path)
            train_dfs.append(df)

# 2. train 데이터 결합
train_df = pd.concat(train_dfs, ignore_index=True)

# 3. 2425 시즌 테스트셋 불러오기
test_df = pd.read_csv("../data/datas/2425/england-premier-league-final-2024-to-2025-stats_processed.csv")

# 4. 제외할 컬럼
exclude = [
    'home_team_name', 'away_team_name', 'match_date',
    'home_team_goal_count', 'away_team_goal_count',
    'result', 'home_points', 'away_points'
]

# 5. feature 추출
X_train = train_df.drop(columns=exclude, errors='ignore').select_dtypes(include=['float64', 'int64']).fillna(0)
y_train = train_df['home_team_goal_count']

X_test = test_df.drop(columns=exclude, errors='ignore').select_dtypes(include=['float64', 'int64']).fillna(0)
y_test = test_df['home_team_goal_count']

# 6. 모델 학습 및 예측
model = XGBRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# 7. 평가
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
accuracy = (y_pred.round() == y_test).mean()

print(f"🎯 Home Goal 예측 (과거 시즌 학습 → 24/25 테스트)")
print(f"MAE: {mae:.4f} | R²: {r2:.4f} | 정수 정확도: {accuracy:.4f}")


🎯 Home Goal 예측 (과거 시즌 학습 → 24/25 테스트)
MAE: 0.0211 | R²: 0.9995 | 정수 정확도: 1.0000


In [11]:
import pandas as pd

# 1. 24/25 시즌 경기 데이터 로드
matches = pd.read_csv("../data/datas/2425/england-premier-league-final-2024-to-2025-stats_processed.csv")

# 2. 정렬 (이미 정렬되어 있다고 했지만, 안전하게 다시 확인)
matches = matches.reset_index(drop=True)

# 3. 제외할 컬럼
exclude = [
    'home_team_name', 'away_team_name',
    'home_team_goal_count', 'away_team_goal_count',
    'result', 'home_points', 'away_points'
]

# 4. feature 후보: 숫자형 중에서 제외 항목 제거
feature_cols = matches.select_dtypes(include=['float64', 'int64']).columns
feature_cols = [col for col in feature_cols if col not in exclude]

# 5. 경기별 rolling 평균 계산
rolling_rows = []

for i, row in matches.iterrows():
    home_team = row['home_team_name']
    away_team = row['away_team_name']
    past_matches = matches.iloc[:i]  # 현재 경기 이전까지만 사용

    # home team의 최근 5경기 평균
    recent_home = past_matches[
        (past_matches['home_team_name'] == home_team) | 
        (past_matches['away_team_name'] == home_team)
    ].tail(5)
    home_avg = recent_home[feature_cols].mean().add_prefix("home_")

    # away team의 최근 5경기 평균
    recent_away = past_matches[
        (past_matches['home_team_name'] == away_team) |
        (past_matches['away_team_name'] == away_team)
    ].tail(5)
    away_avg = recent_away[feature_cols].mean().add_prefix("away_")

    # 합치기
    combined = pd.concat([home_avg, away_avg])
    rolling_rows.append(combined)

# 6. DataFrame 변환 및 저장
rolling_features_df = pd.DataFrame(rolling_rows).reset_index(drop=True)
rolling_features_df.to_csv("rolling_features.csv", index=False)
print("✅ rolling_features.csv 저장 완료")


✅ rolling_features.csv 저장 완료


In [9]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import os

# 1. 학습 데이터 로딩 (2425 제외)
train_paths = []
data_root = "../data/datas/"
for root, dirs, files in os.walk(data_root):
    for file in files:
        if file.endswith(".csv") and "final" in file and "2425" not in file:
            train_paths.append(os.path.join(root, file))

train_df = pd.concat([pd.read_csv(path) for path in train_paths], ignore_index=True)

# 2. 테스트 데이터 로딩 (2425 시즌)
test_df = pd.read_csv("../data/datas/2425/england-premier-league-final-2024-to-2025-stats_processed.csv")

# 3. feature 제외 항목
exclude = [
    "home_team_name", "away_team_name", "match_date",
    "home_team_goal_count", "away_team_goal_count",
    "result", "home_points", "away_points"
]

# 4. 학습 데이터 (공통)
X_train = train_df.drop(columns=exclude, errors="ignore").select_dtypes(include=["float64", "int64"]).fillna(0)

# 🎯 Home goal 학습
y_train_home = train_df["home_team_goal_count"]

# 🎯 Away goal 학습
y_train_away = train_df["away_team_goal_count"]

# 5. 테스트 feature (최근 5경기 rolling 평균)
rolling_features_df = pd.read_csv("rolling_features.csv")  # 앞에서 만든 rolling 평균
X_test = rolling_features_df.fillna(0)

y_test_home = test_df["home_team_goal_count"]
y_test_away = test_df["away_team_goal_count"]

# 6. 모델 학습
model_home = XGBRegressor(random_state=42)
model_away = XGBRegressor(random_state=42)

model_home.fit(X_train, y_train_home)
model_away.fit(X_train, y_train_away)

# 7. 예측
y_pred_home = model_home.predict(X_test)
y_pred_away = model_away.predict(X_test)

# 8. 평가 지표 출력
def print_metrics(y_true, y_pred, label="Home"):
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    acc = (y_true == y_pred.round()).mean()
    print(f"🎯 {label} Goal 예측 결과")
    print(f"MAE: {mae:.4f} | R²: {r2:.4f} | 정수 정확도: {acc:.4f}\n")

print_metrics(y_test_home, y_pred_home, label="Home")
print_metrics(y_test_away, y_pred_away, label="Away")


FileNotFoundError: [Errno 2] No such file or directory: 'rolling_features.csv'