<a href="https://colab.research.google.com/github/ljw-0108/deep-learning-project/blob/main/%ED%86%A0%ED%8A%B8%EB%84%98_%EC%9A%B0%EC%8A%B9%EC%98%88%EC%B8%A1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas numpy scikit-learn tensorflow

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping




In [7]:
# 데이터 로드
df = pd.read_csv('/content/drive/MyDrive/dl/England CSV.csv')  # Colab 환경에서는 드라이브 연동 후 경로를 맞춰주세요

# 컬럼 이름 간소화
df = df.rename(columns={
    'FTH Goals': 'FTHG', 'FTA Goals': 'FTAG',
    'HTH Goals': 'HTHG', 'HTA Goals': 'HTAG',
    'FT Result': 'FTR', 'HT Result': 'HTR'
})

# 날짜와 시즌 정보
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
df['Season'] = df['Season'].str.replace('/','-')  # ex: '2024/25' → '2024-25'

df.head(12153)

Unnamed: 0,Date,Season,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,...,H Fouls,A Fouls,H Corners,A Corners,H Yellow,A Yellow,H Red,A Red,Display_Order,League
0,2025-01-16,2024-25,Ipswich Town,Brighton & Hove Albion,0,2,A,0.0,1.0,A,...,13.0,14.0,1.0,9.0,2.0,2.0,0.0,0.0,20250116,Premier League
1,2025-01-16,2024-25,Man United,Southampton,3,1,H,0.0,1.0,A,...,7.0,10.0,4.0,4.0,1.0,3.0,0.0,0.0,20250116,Premier League
2,2025-01-15,2024-25,Everton,Aston Villa,0,1,A,0.0,0.0,D,...,17.0,10.0,8.0,5.0,2.0,1.0,0.0,0.0,20250115,Premier League
3,2025-01-15,2024-25,Leicester,Crystal Palace,0,2,A,0.0,0.0,D,...,7.0,6.0,4.0,3.0,0.0,0.0,0.0,0.0,20250115,Premier League
4,2025-01-15,2024-25,Newcastle,Wolves,3,0,H,1.0,0.0,H,...,10.0,13.0,4.0,2.0,0.0,2.0,0.0,0.0,20250115,Premier League
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12148,1993-08-14,1993-94,Southampton,Everton,0,2,A,,,,...,,,,,,,,,19930814,Premier League
12149,1993-08-14,1993-94,Sheffield United,Swindon,3,1,H,,,,...,,,,,,,,,19930814,Premier League
12150,1993-08-14,1993-94,Liverpool,Sheffield Weds,2,0,H,,,,...,,,,,,,,,19930814,Premier League
12151,1993-08-14,1993-94,Aston Villa,QPR,4,1,H,,,,...,,,,,,,,,19930814,Premier League


In [8]:
# 경기 결과(win/draw/loss) 숫자화 함수
def get_result(row, team):
    if row['HomeTeam'] == team:
        return 1 if row['FTR']=='H' else (0.5 if row['FTR']=='D' else 0)
    else:
        return 1 if row['FTR']=='A' else (0.5 if row['FTR']=='D' else 0)

# 시즌·팀별 집계용 빈 DataFrame 생성
teams = pd.unique(df[['HomeTeam','AwayTeam']].values.ravel())
seasons = df['Season'].unique()
agg_rows = []

for season in seasons:
    dft = df[df['Season']==season]
    for team in teams:
        sub = dft[(dft['HomeTeam']==team)|(dft['AwayTeam']==team)]
        if sub.empty: continue

        wins = sum(sub.apply(lambda r: get_result(r, team)==1, axis=1))
        draws = sum(sub.apply(lambda r: get_result(r, team)==0.5, axis=1))
        losses = sum(sub.apply(lambda r: get_result(r, team)==0, axis=1))
        gf = sum(np.where(sub['HomeTeam']==team, sub['FTHG'], sub['FTAG']))
        ga = sum(np.where(sub['HomeTeam']==team, sub['FTAG'], sub['FTHG']))
        pts = wins*3 + draws*1

        agg_rows.append({
            'Season': season,
            'Team': team,
            'Wins': wins,
            'Draws': draws,
            'Losses': losses,
            'GF': gf,
            'GA': ga,
            'GD': gf-ga,
            'Points': pts,
            'Matches': len(sub)
        })

df_team = pd.DataFrame(agg_rows)


In [9]:
# 시즌별 최고 득점(Points) 팀을 우승자로 레이블링
champions = (df_team
             .loc[df_team.groupby('Season')['Points'].idxmax(), ['Season','Team']]
             .rename(columns={'Team':'Champion'}))

# 원본 데이터와 병합
df_team = df_team.merge(champions, on='Season')
# 챔피언이면 1, 아니면 0
df_team['IsChampion'] = (df_team['Team'] == df_team['Champion']).astype(int)


In [10]:
# 특성(feature)과 레이블(label) 분리
features = ['Wins','Draws','Losses','GF','GA','GD','Points','Matches']
X = df_team[features]
y = df_team['IsChampion']

# 데이터 분리
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 스케일링
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)


In [16]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=16,
    callbacks=[es]
)


Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 58ms/step - accuracy: 0.8537 - loss: 0.5457 - val_accuracy: 0.9538 - val_loss: 0.3487
Epoch 2/200
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.9439 - loss: 0.3168 - val_accuracy: 0.9538 - val_loss: 0.2270
Epoch 3/200
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.9574 - loss: 0.2405 - val_accuracy: 0.9538 - val_loss: 0.1773
Epoch 4/200
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9420 - loss: 0.2229 - val_accuracy: 0.9538 - val_loss: 0.1469
Epoch 5/200
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9539 - loss: 0.1604 - val_accuracy: 0.9538 - val_loss: 0.1277
Epoch 6/200
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9329 - loss: 0.1769 - val_accuracy: 0.9538 - val_loss: 0.1113
Epoch 7/200
[1m33/33[0m [32m━━━━━━━━━━━

In [17]:
# 전체 성능 확인
loss, acc = model.evaluate(X_val, y_val, verbose=0)
print(f'검증 정확도: {acc:.3f}')

# 2024-25 시즌 토트넘 데이터만 추출해 예측
tott_2425 = df_team[
    (df_team['Season']=='2024-25') & (df_team['Team']=='Tottenham')
][features]
tott_scaled = scaler.transform(tott_2425)
prob = model.predict(tott_scaled)[0][0]
print(f"2024-25 시즌 토트넘 우승 확률 예측: {prob*100:.2f}%")


검증 정확도: 0.977
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
2024-25 시즌 토트넘 우승 확률 예측: 0.00%
