<a href="https://colab.research.google.com/github/phosgenek/UEM/blob/main/MdSP1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [78]:
from google.colab import files
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [79]:
# Subir el archivo CSV
#uploaded = files.upload()

# Cargar el dataset
file_path = "cleaned_data_lluvia.csv"
df = pd.read_csv(file_path)

In [80]:
df.head()

Unnamed: 0,id,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,HS,...,AY,HR,AR,City,Timestamp,Latitud,Longitud,Clima,Temperatura,Presion
0,0,Alaves,Barcelona,0,0,D,0,0,D,5,...,1,0,0,Vitoria-Gasteiz,2005-08-27 18:00:00,42.846509,-2.672403,Despejado,18.6,1017.3
1,1,Ath Bilbao,Sociedad,3,0,H,0,0,D,10,...,1,0,0,Bilbao,2005-08-27 16:00:00,43.263002,-2.935004,Despejado,24.0,1018.2
2,2,Valencia,Betis,1,0,H,0,0,D,9,...,3,0,0,Valencia,2005-08-27 14:30:00,39.469707,-0.376335,Despejado,,
3,4,Ath Madrid,Zaragoza,0,0,D,0,0,D,16,...,7,0,0,Madrid,2005-08-28 14:30:00,40.416705,-3.703582,Despejado,,
4,7,Cadiz,Real Madrid,1,2,A,0,1,A,15,...,2,0,0,Cádiz,2005-08-28 21:00:00,36.529744,-6.292898,Despejado,22.0,1016.4


In [81]:
# Convertir fecha a datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Eliminar columnas irrelevantes
drop_cols = ['Unnamed', 'Latitud', 'Longitud']
df = df.drop(columns=[col for col in drop_cols if col in df.columns])

# Manejo de valores nulos
for col in df.columns:
    if df[col].dtype == "object":
        df[col].fillna(df[col].mode()[0], inplace=True)  # Moda para categóricas
    else:
        df[col].fillna(df[col].mean(), inplace=True)  # Media para numéricas

# Codificar 'FTR' y 'HTR' con LabelEncoder
for col in ['FTR', 'HTR']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])  # Convertimos 'D', 'H', 'A' en números

# Unificar equipos para que tengan el mismo código en HomeTeam y AwayTeam
equipos_unicos = pd.concat([df['HomeTeam'], df['AwayTeam']]).unique()
team_encoder = LabelEncoder()
team_encoder.fit(equipos_unicos)
df['HomeTeam'] = team_encoder.transform(df['HomeTeam'])
df['AwayTeam'] = team_encoder.transform(df['AwayTeam'])

# Codificar ciudades y clima
city_encoder = LabelEncoder()
df['City'] = city_encoder.fit_transform(df['City'])

clima_encoder = LabelEncoder()
df['Clima'] = clima_encoder.fit_transform(df['Clima'])

# Guardar los mapeos en diccionarios
equipos_mapeados = {team: code for team, code in zip(team_encoder.classes_, team_encoder.transform(team_encoder.classes_))}
ciudades_mapeadas = {city: code for city, code in zip(city_encoder.classes_, city_encoder.transform(city_encoder.classes_))}
climas_mapeados = {clima: code for clima, code in zip(clima_encoder.classes_, clima_encoder.transform(clima_encoder.classes_))}

# Imprimir los mapeos
print("Mapeo de equipos:", equipos_mapeados)
print("Mapeo de ciudades:", ciudades_mapeadas)
print("Mapeo de climas:", climas_mapeados)

Mapeo de equipos: {'Alaves': 0, 'Almeria': 1, 'Ath Bilbao': 2, 'Ath Madrid': 3, 'Barcelona': 4, 'Betis': 5, 'Cadiz': 6, 'Celta': 7, 'Cordoba': 8, 'Eibar': 9, 'Elche': 10, 'Espanol': 11, 'Getafe': 12, 'Gimnastic': 13, 'Girona': 14, 'Granada': 15, 'Hercules': 16, 'Huesca': 17, 'La Coruna': 18, 'Las Palmas': 19, 'Leganes': 20, 'Levante': 21, 'Malaga': 22, 'Mallorca': 23, 'Numancia': 24, 'Osasuna': 25, 'Real Madrid': 26, 'Recreativo': 27, 'Santander': 28, 'Sevilla': 29, 'Sociedad': 30, 'Sp Gijon': 31, 'Tenerife': 32, 'Valencia': 33, 'Valladolid': 34, 'Vallecano': 35, 'Villarreal': 36, 'Xerez': 37, 'Zaragoza': 38}
Mapeo de ciudades: {'A Coruña': 0, 'Alicante': 1, 'Almería': 2, 'Barcelona': 3, 'Bilbao': 4, 'Cádiz': 5, 'Córdoba': 6, 'Eibar': 7, 'Elche': 8, 'Getafe': 9, 'Gijón': 10, 'Girona': 11, 'Granada': 12, 'Huelva': 13, 'Huesca': 14, 'Jerez de la Frontera': 15, 'Las Palmas de Gran Canaria': 16, 'Leganés': 17, 'Madrid': 18, 'Málaga': 19, 'Palma de Mallorca': 20, 'Pamplona': 21, 'San Sebast

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)  # Media para numéricas
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)  # Moda para categóricas


In [82]:
print(df.select_dtypes(include=['object']).columns)
print(df['FTR'].unique())
print(df['HTR'].unique())


Index([], dtype='object')
[1 2 0]
[1 0 2]


In [83]:
# Selección de características y variable objetivo
X = df.drop(columns=['FTR', 'Timestamp'])
y = df['FTR']  # Resultado del partido

# Normalizar características numéricas
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Dividir en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convertir a categórico para clasificación multiclase
y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=3)

# Construir el modelo de red neuronal
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')  # 3 clases: victoria local, empate, victoria visitante
])

# Compilar el modelo
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [84]:
# Entrenar el modelo
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Evaluar el modelo
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Precisión en test: {test_acc:.4f}")

Epoch 1/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6970 - loss: 0.6874 - val_accuracy: 0.9739 - val_loss: 0.1840
Epoch 2/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9881 - loss: 0.1164 - val_accuracy: 0.9986 - val_loss: 0.0280
Epoch 3/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9999 - loss: 0.0173 - val_accuracy: 0.9993 - val_loss: 0.0100
Epoch 4/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0060 - val_accuracy: 0.9993 - val_loss: 0.0050
Epoch 5/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0030 - val_accuracy: 0.9993 - val_loss: 0.0036
Epoch 6/50
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0017 - val_accuracy: 0.9993 - val_loss: 0.0025
Epoch 7/50
[1m177/177[0m 

In [85]:
# Función para predecir un nuevo partido con códigos directamente
def predecir_partido(home_team_code, away_team_code, city_code, clima_code, temperatura):
    # Crear entrada de predicción con la cantidad correcta de características
    entrada = np.zeros((1, X_train.shape[1]))  # Crear un array con ceros del mismo tamaño que el conjunto de entrenamiento

    # Insertar los valores en las posiciones correctas
    entrada[0, 0] = home_team_code     # Asignar código del equipo local
    entrada[0, 1] = away_team_code     # Asignar código del equipo visitante
    entrada[0, 2] = city_code          # Asignar código de la ciudad
    entrada[0, 3] = clima_code         # Asignar código del clima
    entrada[0, 4] = temperatura        # Asignar temperatura

    # Normalizar la entrada con el scaler ya entrenado
    entrada = scaler.transform(entrada)

    # Realizar la predicción
    prediccion = model.predict(entrada)

    # Obtener la clase con mayor probabilidad
    categorias = ['Victoria local', 'Empate', 'Victoria visitante']
    resultado = categorias[np.argmax(prediccion)]

    return resultado

In [86]:
prediccion = predecir_partido(3, 2, 18, 9, 8)  # Códigos ya convertidos
print(f"Predicción: {prediccion}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219ms/step
Predicción: Empate
