<a href="https://colab.research.google.com/github/perico85/Alphazero/blob/master/funcional_2_predicciones.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np

# Lista de URLs de los archivos CSV
urls = [
    "https://www.football-data.co.uk/mmz4281/2324/E0.csv",
    "https://www.football-data.co.uk/mmz4281/2223/E0.csv",
    "https://www.football-data.co.uk/mmz4281/2122/E0.csv",
    "https://www.football-data.co.uk/mmz4281/2021/E0.csv",
    "https://www.football-data.co.uk/mmz4281/1920/E0.csv",
    # Añade más URLs si es necesario
]

# Codificar las etiquetas de resultado FTR y HTR
label_encoder = LabelEncoder()

# Función para cargar los datos desde una URL y procesarlos
def cargar_datos_desde_urls(urls):
    dataframes = []
    for url in urls:
        data = pd.read_csv(url)
        data['FTR'] = label_encoder.fit_transform(data['FTR'])
        data['HTR'] = label_encoder.fit_transform(data['HTR'])
        dataframes.append(data)
    return pd.concat(dataframes)

# Función para calcular las estadísticas de un equipo basado en sus enfrentamientos previos
def calcular_estadisticas_equipo(matches, team, home=True):
    if home:
        matches = matches[matches['HomeTeam'] == team]
        estadisticas = matches[['FTHG', 'FTR', 'HTHG', 'HTR', 'HS', 'HST', 'HF', 'HC', 'HY', 'HR']].mean()
    else:
        matches = matches[matches['AwayTeam'] == team]
        estadisticas = matches[['FTAG', 'FTR', 'HTAG', 'HTR', 'AS', 'AST', 'AF', 'AC', 'AY', 'AR']].mean()
    return estadisticas.values

# Seleccionar las características relevantes para los modelos
features = ['Home_FTHG', 'Home_FTR', 'Home_HTHG', 'Home_HTR', 'Home_HS', 'Home_HST', 'Home_HF', 'Home_HC', 'Home_HY', 'Home_HR', 'Away_FTAG', 'Away_FTR', 'Away_HTAG', 'Away_HTR', 'Away_AS', 'Away_AST', 'Away_AF', 'Away_AC', 'Away_AY', 'Away_AR']

# Función para entrenar y evaluar los modelos
def train_and_evaluate_model(data):
    X_train, X_test, y_train, y_test = train_test_split(data[features], data['FTR'], test_size=0.2, random_state=42)

    # Crear y entrenar el modelo de regresión lineal
    regression_model = LinearRegression()
    regression_model.fit(X_train, y_train)

    # Crear y entrenar el modelo de red neuronal
    ann_model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(64, activation='relu'),
        Dense(3, activation='softmax')
    ])
    ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    ann_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

    # Evaluar los modelos en el conjunto de prueba
    regression_accuracy = regression_model.score(X_test, y_test)
    ann_loss, ann_accuracy = ann_model.evaluate(X_test, y_test)

    return regression_accuracy, ann_loss, ann_accuracy

# Cargar los datos de todas las URLs
data = cargar_datos_desde_urls(urls)

# Crear nuevas columnas para las estadísticas del equipo local y visitante
data[['Home_FTHG', 'Home_FTR', 'Home_HTHG', 'Home_HTR', 'Home_HS', 'Home_HST', 'Home_HF', 'Home_HC', 'Home_HY', 'Home_HR']] = data.apply(lambda row: calcular_estadisticas_equipo(data, row['HomeTeam'], home=True), axis=1, result_type='expand')
data[['Away_FTAG', 'Away_FTR', 'Away_HTAG', 'Away_HTR', 'Away_AS', 'Away_AST', 'Away_AF', 'Away_AC', 'Away_AY', 'Away_AR']] = data.apply(lambda row: calcular_estadisticas_equipo(data, row['AwayTeam'], home=False), axis=1, result_type='expand')

# Entrenar y evaluar los modelos con los datos combinados
regression_accuracy, ann_loss, ann_accuracy = train_and_evaluate_model(data)
print(f'Accuracy of Linear Regression: {regression_accuracy}')
print(f'Loss of ANN: {ann_loss}, Accuracy of ANN: {ann_accuracy}')

# Definir modelos globalmente
regression_model = LinearRegression()
regression_model.fit(data[features], data['FTR'])

ann_model = Sequential([
    Dense(64, activation='relu', input_shape=(len(features),)),
    Dense(64, activation='relu'),
    Dense(3, activation='softmax')
])
ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
ann_model.fit(data[features], data['FTR'], epochs=50, batch_size=32, validation_split=0.2)


print(f'Accuracy of Linear Regression: {regression_accuracy}')
print(f'Loss of ANN: {ann_loss}, Accuracy of ANN: {ann_accuracy}')
# Función para predecir el resultado del partido utilizando los dos modelos
def predict_result(home_stats, away_stats):
    match_stats = np.concatenate((home_stats, away_stats))  # Concatenar las estadísticas de los equipos local y visitante
    match_stats = pd.DataFrame(match_stats.reshape(1, -1), columns=features)  # Creación del DataFrame con los nombres
    regression_result = regression_model.predict(match_stats)[0]
    ann_result = ann_model.predict([match_stats])[0]

    # Decode label for regression result
    regression_label = label_encoder.inverse_transform([round(regression_result)])[0]

    # Decode label for ANN result
    ann_label = label_encoder.inverse_transform([ann_result.argmax()])[0]

    return regression_label, ann_label

# Ejemplo de uso
home_team = "Liverpool"  # Reemplazar con el nombre real del equipo local
away_team = "Arsenal"  # Reemplazar con el nombre real del equipo visitante

home_stats = calcular_estadisticas_equipo(data, home_team, home=True)
away_stats = calcular_estadisticas_equipo(data, away_team, home=False)


regression_result, ann_result = predict_result(home_stats, away_stats)
print(f'Resultado predicho por regresión lineal: {regression_result}')
print(f'Resultado predicho por red neuronal: {ann_result}')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy of Linear Regression: 0.19601460982467567
Loss of ANN: 0.9507998824119568, Accuracy of ANN: 0.5446927547454834
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/5