# AutoMPG con regresión lineal

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
!wget -cq https://www.dropbox.com/s/s2s8pbs8lilxdqs/auto-mpg.csv

In [None]:
!ls -la

In [None]:
# Load the dataset
data = pd.read_csv("auto-mpg.csv")  # Replace "your_dataset.csv" with the actual filename

In [None]:
# Shuffle the dataset
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
# Check for missing values
print(data.isnull().sum())  # Count the number of missing values in each column

# Fill missing values with the mean of each column
data.fillna(data.mean(), inplace=True)

In [None]:
# Aplica one-hot encoding a la columna 'origin'
data = pd.get_dummies(data, columns=['origin'], prefix='origin')

In [None]:
data.head()

In [None]:
# Utiliza el método drop() para eliminar la columna 'name'
data = data.drop('name', axis=1)

In [None]:
data.head()

In [None]:
# Calcula la matriz de correlación
correlation_matrix = data.corr()

# Configura el tamaño de la figura
plt.figure(figsize=(10, 8))

# Utiliza heatmap para visualizar la matriz de correlación
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)

# Añade título al gráfico
plt.title("Matriz de Correlación")

# Muestra el gráfico
plt.show()

In [None]:
# Separate the feature matrix X and target vector y
X = data.drop("mpg", axis=1)
y = data["mpg"]

In [None]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import make_scorer, mean_squared_error, r2_score

In [None]:
# Crear un modelo de regresión lineal
model = LinearRegression()

In [None]:
# Cross-validation
cv_scores = cross_val_score(model, X, y, cv=5, scoring="neg_mean_squared_error")
cv_rmse_scores = np.sqrt(-cv_scores)
cv_r2_scores = cross_val_score(model, X, y, cv=5, scoring="r2")

print("Cross-Validation RMSE:", cv_rmse_scores)
print("Cross-Validation R2:", cv_r2_scores)

In [None]:
cv_rmse_mean = np.mean(cv_rmse_scores)
cv_r2_mean = np.mean(cv_r2_scores)

print("Cross-Validation Average RMSE:", cv_rmse_mean)
print("Cross-Validation Average R2:", cv_r2_mean)

In [None]:
def mpg_to_kmpl(mpg):
    # Factor de conversión de millas a kilómetros y de galones a litros
    miles_to_km = 1.60934
    gallons_to_liters = 3.78541

    # Realizar la conversión de mpg a km/l
    kmpl = (mpg * miles_to_km) / gallons_to_liters

    return kmpl

In [None]:
print(mpg_to_kmpl(cv_rmse_mean))