# AutoMPG con SVM

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, cross_val_predict
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
!wget -cq https://www.dropbox.com/s/s2s8pbs8lilxdqs/auto-mpg.csv

In [None]:
# Cargar el dataset
data = pd.read_csv("auto-mpg.csv")

In [None]:
# Barajear el dataset
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
# Preprocesamiento
data.fillna(data.mean(), inplace=True)
data = pd.get_dummies(data, columns=['origin'], prefix='origin')
data = data.drop('name', axis=1)

In [None]:
# Separar la matriz de características X y el vector objetivo y
X = data.drop("mpg", axis=1)
y = data["mpg"]

In [None]:
# Escalamiento de los datos
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Configuración de parámetros para GridSearchCV
param_grid = {
    'C': [20, 25, 30],
    'gamma': [1, 0.1, 0.01],
    'kernel': ['rbf', 'poly', 'linear']
}

In [None]:
# Crear un modelo SVR con GridSearchCV
svr = SVR()
grid_search = GridSearchCV(svr, param_grid, refit=True, verbose=2, cv=5, scoring='neg_mean_squared_error')

In [None]:
# Entrenar el modelo con GridSearchCV
grid_search.fit(X_scaled, y)

In [None]:
# Mejores parámetros
print("Mejores parámetros:", grid_search.best_params_)

In [None]:
# Mejor modelo encontrado por GridSearchCV
best_svr = grid_search.best_estimator_

In [None]:
# Validación cruzada con el mejor modelo
cv_scores = cross_val_score(best_svr, X_scaled, y, cv=5, scoring="neg_mean_squared_error")
cv_rmse_scores = np.sqrt(-cv_scores)
cv_r2_scores = cross_val_score(best_svr, X_scaled, y, cv=5, scoring="r2")

In [None]:
print("Cross-Validation RMSE:", cv_rmse_scores)
print("Cross-Validation R2:", cv_r2_scores)

In [None]:
cv_rmse_mean = np.mean(cv_rmse_scores)
cv_r2_mean = np.mean(cv_r2_scores)

print("Cross-Validation Average RMSE:", cv_rmse_mean)
print("Cross-Validation Average R2:", cv_r2_mean)

In [None]:
def mpg_to_kmpl(mpg):
    # Factor de conversión de millas a kilómetros y de galones a litros
    miles_to_km = 1.60934
    gallons_to_liters = 3.78541

    # Realizar la conversión de mpg a km/l
    kmpl = (mpg * miles_to_km) / gallons_to_liters

    return kmpl

In [None]:
print(mpg_to_kmpl(cv_rmse_mean))

In [None]:
!pip install eli5

In [None]:
import eli5
from eli5.sklearn import PermutationImportance

# Asegúrate de que tu modelo SVM (best_svr) esté entrenado y listo para usar
# y que X_scaled y y sean tus datos de características y objetivo.

# Crear un objeto de importancia de permutación usando el modelo SVM
perm = PermutationImportance(best_svr, random_state=1).fit(X_scaled, y)

# Mostrar la importancia de las características
eli5.show_weights(perm, feature_names = X.columns.tolist())
