In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from sklearn.metrics import f1_score
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanSquaredError
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time
import pandas as pd
import json
import pytesseract
from PIL import Image
import ast
import re
import numpy as np

In [5]:
# Load the team_data.csv file
team_data = pd.read_csv('data_raw/team_data_filtered_sorted.csv')

# Select only the relevant columns for modeling
relevant_columns = ['Total_shots', 'Shots_off', 'Shots_saved', 'Corners', 'Free_kicks', 'Off_sides', 'Fouls']
data_filtered = team_data[relevant_columns]

# Convert data into sequences of 5 games for features and the 6th game for labels
sequence_length = 5
X, y = [], []

for i in range(len(data_filtered) - sequence_length):
    # Extract 5 consecutive games for input
    X.append(data_filtered.iloc[i:i + sequence_length].values)
    # Extract the next game as the target
    y.append(data_filtered.iloc[i + sequence_length].values)

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

X.shape, y.shape  # Verify shapes


((353, 5, 7), (353, 7))

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])
X_train_scaled = scaler.fit_transform(X_train_reshaped).reshape(X_train.shape)
X_test_scaled = scaler.transform(X_test_reshaped).reshape(X_test.shape)

In [None]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(units=64, return_sequences=True, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=64))
model.add(Dropout(0.2))
model.add(Dense(units=7))  # Output layer with 7 units, matching the number of target features

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, validation_data=(X_test_scaled, y_test))

history.history


In [None]:
# Lưu mô hình vào tệp .h5
model.save("lstm_football_model.h5")
print("Model saved to lstm_football_model.h5")

In [6]:
# Tải mô hình với custom_objects để xử lý hàm 'mse'
model = load_model("lstm_football_model.h5", custom_objects={"mse": MeanSquaredError()})
print("Model loaded from lstm_football_model.h5")



Model loaded from lstm_football_model.h5


In [11]:
# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Since the model outputs continuous values, we need to round them to the nearest integer
y_pred_rounded = np.round(y_pred).astype(int)

# Calculate the F1-score for each feature and then take the average
f1_scores = []
for i in range(y_test.shape[1]):
    f1 = f1_score(y_test[:, i], y_pred_rounded[:, i], average='weighted')
    f1_scores.append(f1)

# Calculate the average F1-score
average_f1_score = np.mean(f1_scores)

print("Average F1-score:", average_f1_score)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 129ms/step
Average F1-score: 0.08740959327343734


In [12]:
# Tải mô hình với custom_objects để xử lý hàm 'mse'
model = load_model("lstm_football_model.h5", custom_objects={"mse": MeanSquaredError()})
print("Model loaded from lstm_football_model.h5")


# Ví dụ giả định về recent_matches (thay thế bằng dữ liệu thực của bạn)
recent_matches = np.array([
    [
        [10, 3, 7, 2, 5, 15, 1],  # Dữ liệu trận 1
        [12, 2, 8, 4, 6, 13, 2],  # Dữ liệu trận 2
        [9, 5, 6, 1, 4, 14, 0],   # Dữ liệu trận 3
        [11, 3, 9, 3, 7, 12, 1],  # Dữ liệu trận 4
        [10, 4, 5, 2, 6, 13, 0],  # Dữ liệu trận 5
    ]
])

# Dự đoán
prediction = model.predict(recent_matches)
print("Predicted stats for next match:", prediction)



Model loaded from lstm_football_model.h5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step
Predicted stats for next match: [[13.913827   4.5459886  3.1269624  5.153135   8.899273   2.032382
  10.820864 ]]
