In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib as jb

# Cargar dataset
car_df = pd.read_csv('car_price_dataset.csv')

# Codificación de variables categóricas
label_encoders = {}
categorical_columns = ['Brand', 'Model', 'Fuel_Type', 'Transmission']

for column in categorical_columns:
    le = LabelEncoder()
    car_df[column] = le.fit_transform(car_df[column])
    label_encoders[column] = le  # Guardar para transformaciones futuras

jb.dump(label_encoders, 'label_encoders.pkl')

# Separación en variables independientes y objetivo
X = car_df.drop(columns=['Price'])
y = car_df['Price']

# División en datos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenar modelo de regresión lineal
model = LinearRegression()
model.fit(X_train, y_train)

# Predicciones
y_pred = model.predict(X_test)

# Evaluación del modelo con métricas adecuadas
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Guardar el modelo
jb.dump(model, 'car_price_model.pkl')

MAE: 793.477135030119
MSE: 818588.2528900569
R2 Score: 0.9109049829961213


['car_price_model.pkl']