In [None]:
import pandas as pd
import numpy as np
import requests
from lxml import html
import time
from datetime import datetime
from unidecode import unidecode
import warnings
from tqdm import tqdm
import os
import sys

sys.path.append(os.getcwd() + "/src")
import spider
import models as models
import functions as func

In [None]:
df = pd.read_parquet("/workspaces/cars/data/df_refine_20240915.parquet")
df.shape

In [4]:
cols_numeric = ["precio", "año", "km", "motor", "km_por_año"]
for col in cols_numeric:
    df[col] = df[col].astype(float)
df_train = df[["precio", "año", "km", "motor", "km_por_año", "transmision", "tipo_de_combustible", 
                "tipo_de_carroceria", "puertas","marca", "modelo_agrup"]].dropna()
df_train["precio"] = np.log(df_train["precio"].astype(float))

X_train, X_test, y_train, y_test = models.TrainTestDummies(df = df_train, y='precio',
                                                                dummies = ["marca", "modelo_agrup", "transmision", "tipo_de_combustible", 
                                                                        "tipo_de_carroceria", "puertas"])

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
import pandas as pd
import numpy as np

def create_model(layers, activation='relu', optimizer='adam'):
    model = Sequential()
    for i, neurons in enumerate(layers):
        if i == 0:
            model.add(Dense(neurons, input_shape=(X_train.shape[1],), activation=activation))
        else:
            model.add(Dense(neurons, activation=activation))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer=optimizer, loss='mse')
    
    return model

scalerX, scalerY = MinMaxScaler(), MinMaxScaler()
X_train_scaled = scalerX.fit_transform(X_train)
y_train = np.array(y_train).reshape(-1, 1)
y_train_scaled = scalerY.fit_transform(y_train)

model = KerasRegressor(build_fn=create_model, verbose=0)

param_grid = {
    'layers': [[32, 64]], 
    'activation': ['relu'],        
    'optimizer': ['adam'],
    'epochs': [50, 10],
    'batch_size': [20],    
}

X_new = X_test

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train_scaled, y_train_scaled)
print(f"Best grid: {grid_result.best_params_}")

X_new_scaled = scalerX.transform(X_new)
y_pred_scaled = grid_result.best_estimator_.predict(X_new_scaled)
y_pred = scalerY.inverse_transform(y_pred_scaled.reshape(-1, 1))

In [None]:
y_pred = np.exp(y_pred)
y_pred = [float(x) for x in y_pred for item in x]

metrics = models.MetricsRegression(np.exp(y_test), X_test, y_pred)

# PSO

In [None]:
df = pd.read_parquet("/workspaces/cars/data/df_refine_20240915.parquet")
df.shape

cols_numeric = ["precio", "año", "km", "motor", "km_por_año"]
for col in cols_numeric:
    df[col] = df[col].astype(float)
df_train = df[["precio", "año", "km", "motor", "km_por_año", "transmision", "tipo_de_combustible", 
                "tipo_de_carroceria", "puertas","marca", "modelo_agrup"]].dropna()
df_train["precio"] = np.log(df_train["precio"].astype(float))

X_train, X_test, y_train, y_test = models.TrainTestDummies(df = df_train, y='precio',
                                                                dummies = ["marca", "modelo_agrup", "transmision", "tipo_de_combustible", 
                                                                        "tipo_de_carroceria", "puertas"])
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
from pyswarm import pso  # Algoritmo de enjambre de partículas

X_new = X_test

# Función para crear el modelo
def create_model(layers, activation='relu'):
    model = Sequential()
    
    # Añadir las capas ocultas según la lista 'layers'
    for i, neurons in enumerate(layers):
        if i == 0:  # Primera capa (con input_shape)
            model.add(Dense(neurons, input_shape=(X_train.shape[1],), activation=activation))
        else:  # Capas ocultas adicionales
            model.add(Dense(neurons, activation=activation))
    
    # Capa de salida
    model.add(Dense(1, activation='linear'))  # Salida para regresión
    
    return model

# Función de evaluación (función de costo) para PSO
def evaluate_weights(weights, model, X, y):
    # Asignar los pesos al modelo
    start = 0
    for layer in model.layers:
        num_weights = np.prod(layer.get_weights()[0].shape)
        num_biases = np.prod(layer.get_weights()[1].shape)
        layer.set_weights([
            weights[start:start + num_weights].reshape(layer.get_weights()[0].shape),
            weights[start + num_weights:start + num_weights + num_biases].reshape(layer.get_weights()[1].shape)
        ])
        start += num_weights + num_biases

    # Hacer predicciones con el modelo
    predictions = model.predict(X)
    
    # Calcular la función de costo (error cuadrático medio)
    loss = np.mean((predictions - y) ** 2)
    
    return loss

# Escalar los datos
scalerX, scalerY = MinMaxScaler(), MinMaxScaler()
X_train_scaled = scalerX.fit_transform(X_train)
y_train = np.array(y_train).reshape(-1, 1)
y_train_scaled = scalerY.fit_transform(y_train)

# Crear el modelo
layers_config = [6, 12]  # Configuración de capas
activation_function = 'relu'
model = create_model(layers=layers_config, activation=activation_function)

# Contar el número total de pesos en el modelo
total_weights = sum([np.prod(layer.get_weights()[0].shape) + np.prod(layer.get_weights()[1].shape) for layer in model.layers])

# Definir los límites para PSO (mínimo y máximo valor para los pesos)
lb = -1 * np.ones(total_weights)
ub = 1 * np.ones(total_weights)

# Ejecutar PSO para optimizar los pesos del modelo
best_weights, best_loss = pso(evaluate_weights, lb, ub, args=(model, X_train_scaled, y_train_scaled), swarmsize=50, maxiter=100)

# Asignar los mejores pesos encontrados por PSO al modelo
start = 0
for layer in model.layers:
    num_weights = np.prod(layer.get_weights()[0].shape)
    num_biases = np.prod(layer.get_weights()[1].shape)
    layer.set_weights([
        best_weights[start:start + num_weights].reshape(layer.get_weights()[0].shape),
        best_weights[start + num_weights:start + num_weights + num_biases].reshape(layer.get_weights()[1].shape)
    ])
    start += num_weights + num_biases

# Hacer predicciones con el modelo ajustado por PSO
X_new_scaled = scalerX.transform(X_new)
y_pred_scaled = model.predict(X_new_scaled)
y_pred = scalerY.inverse_transform(y_pred_scaled.reshape(-1, 1))

# Mostrar las predicciones
print("Predicciones (escala original):")
print(y_pred)


In [None]:
año = 2005
km = 150000
motor = 4
marca = "Ford"
modelo = "Explorer"
tipo_de_carroceria = "Camioneta"
puertas = "4_5"
transmision = "Automático"
tipo_de_combustible = "Gasolina"
modelo = df[df["modelo"] == modelo]["modelo_agrup"].unique()[0]

import joblib
files_models = sorted(os.listdir("models_pkl"))[-1]
model = joblib.load("models_pkl/" + files_models)  

cols_numeric = ["precio", "año", "km", "motor", "km_por_año"]
for col in cols_numeric:
    df[col] = df[col].astype(float)
df_train = df[["precio", "año", "km", "motor", "km_por_año", "transmision", "tipo_de_combustible", "tipo_de_carroceria", "puertas",
                "marca", "modelo_agrup"]].dropna()
df_train["precio"] = np.log(df_train["precio"].astype(float))

dim_models = func.pkl.Load("pkl/dim_models_group.pkl")[["modelo", "modelo_agrup"]].drop_duplicates()
dim_doors = func.pkl.Load("pkl/dim_models_group.pkl")[["modelo", "puertas"]].drop_duplicates()

modelo = dim_models[dim_models["modelo"] == modelo]["modelo_agrup"].unique()[0]
puertas = dim_doors[dim_doors["modelo"] == modelo]["puertas"].unique().tolist()

if puertas == []:
    puertas = "4_5"
else:
    puertas = puertas[0]
df_new = pd.DataFrame({"año":[año], "km":[km], "motor":[motor], "km_por_año":[km/(datetime.today().year-año)], 
                        "transmision":[transmision], "tipo_de_combustible":[tipo_de_combustible],"marca":[marca], 
                        "modelo_agrup":[modelo], "puertas":[puertas], "tipo_de_carroceria":[tipo_de_carroceria]})
df_new = pd.get_dummies(df_new, columns = ["transmision", "tipo_de_combustible", "marca", "modelo_agrup",
                                            "puertas", "tipo_de_carroceria"]).replace(True, 1)

df_predict = pd.DataFrame(columns = model.feature_names_in_)
df_predict.loc[0] = 0
features = model.feature_names_in_.tolist()
features_delete = list(set(df_new.columns) - set(features))
if features_delete != []:
    df_predict.drop(columns = features_delete, inplace = True)

predict = int(np.exp(model.predict(features_delete))*1000000)