# Descargar dataset procesado

In [None]:
!pip install gdown

In [None]:
import pandas as pd
import numpy as np
import gdown

In [None]:
url = 'https://drive.google.com/uc?id=10AA2zNBdXltzZ4syn39SA7qLXsoWLAV7'
output = 'fifa-19-cleaned-dataset.csv'
gdown.download(url, output, quiet=False)

df = pd.read_csv(output)

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
df

# Prepara valores para los Tests

In [None]:
X = df.drop('Value', axis=1)  # características
y = df['Value']  # variable a predecir


In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Regresión Lineal

In [None]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)
mae_lr = mean_absolute_error(y_test, y_pred)
print(f"Error absoluto medio (Regresión Lineal): {mae_lr:.4f}")

# Polinómica

In [None]:
from sklearn.preprocessing import PolynomialFeatures

# Transforma las características a un grado polinomial (por ejemplo, 2)
poly = PolynomialFeatures(degree=2)
X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)

lr_poly = LinearRegression()
lr_poly.fit(X_poly_train, y_train)

y_pred_poly = lr_poly.predict(X_poly_test)
mae_poly = mean_absolute_error(y_test, y_pred_poly)
print(f"Error absoluto medio (Regresión Polinomial): {mae_poly:.4f}")

# Ridge

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)
mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
print(f"Error absoluto medio (Ridge): {mae_ridge:.4f}")

# Lasso

In [None]:
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=1.0)
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)
mae_lasso = mean_absolute_error(y_test, y_pred_lasso)
print(f"Error absoluto medio (Lasso): {mae_lasso:.4f}")

# Elastic

In [None]:
from sklearn.linear_model import ElasticNet

elastic = ElasticNet(alpha=1.0, l1_ratio=0.5)
elastic.fit(X_train, y_train)
y_pred_elastic = elastic.predict(X_test)
mae_elastic = mean_absolute_error(y_test, y_pred_elastic)
print(f"Error absoluto medio (ElasticNet): {mae_elastic:.4f}")

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeRegressor

# Árboles de Decisión
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
mae_dt = mean_absolute_error(y_test, y_pred_dt)
print(f"Error absoluto medio (Árboles de Decisión): {mae_dt:.4f}")

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

# Bosques Aleatorios
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
print(f"Error absoluto medio (Bosques Aleatorios): {mae_rf:.4f}")

# SVR

El SVR estuvo una hora ejecutando y tuve que cancelarlo

# Comparación de MAE (Mean Absolute Error)

In [None]:
def compare_mae(mae_lr, mae_poly, mae_ridge, mae_lasso, mae_elastic, mae_dt, mae_rf):
    # Diccionario con los nombres de los modelos y sus respectivos MAE
    mae_values = {
        'Linear Regression': mae_lr,
        'Polynomial Regression': mae_poly,
        'Ridge': mae_ridge,
        'Lasso': mae_lasso,
        'ElasticNet': mae_elastic,
        'Decision Tree': mae_dt,
        'Random Forest': mae_rf
    }

    # Ordenar el diccionario basado en el MAE
    sorted_mae = dict(sorted(mae_values.items(), key=lambda item: item[1]))

    # Imprimir el resumen
    print("Resumen de MAE:")
    for model, mae in sorted_mae.items():
        print(f"{model}: {mae:.4f}")

    # Obtener y mostrar el mejor modelo
    best_model = next(iter(sorted_mae))
    print(f"\nEl mejor modelo es '{best_model}' con un MAE de {sorted_mae[best_model]:.4f}")

In [None]:
compare_mae(mae_lr, mae_poly, mae_ridge, mae_lasso, mae_elastic, mae_dt, mae_rf)

# Porcentage de las predicciones dentro de un margen

En la primera entrega establecimos que para aceptar nuestro modelo al menos el 85% de las predicciones debe caer dentro de un margen de error aceptable, es decir, un rango del 10% respecto al valor real

In [None]:
def percentage_within_margin(y_true, y_pred, margin):
    # Calcular el valor absoluto de la diferencia entre predicción y valor real
    absolute_difference = np.abs(y_true - y_pred)

    # Encuentra cuáles diferencias son menores o iguales al 10% del valor real
    within_margin = absolute_difference <= margin * y_true

    # Calcular el porcentaje de predicciones dentro del margen de error
    percentage = np.mean(within_margin) * 100
    return percentage


In [None]:
y_pred_values = {
  'Linear Regression': y_pred,
  'Polynomial Regression': y_pred_poly,
  'Ridge': y_pred_ridge,
  'Lasso': y_pred_lasso,
  'ElasticNet': y_pred_elastic,
  'Decision Tree': y_pred_dt,
  'Random Forest': y_pred_rf
}

margin = 0.1
for model_name, y_pre in y_pred_values.items():
  percentage = percentage_within_margin(y_test, y_pre, margin)
  print(f"Porcentaje de predicciones dentro del margen de error del {margin*100}% ({model_name}): {percentage:.2f}%")