In [177]:
# Cargamos las librerías necesarias.
import pandas as pd
import pickle
import joblib
from sklearn.preprocessing import StandardScaler

In [178]:
# Cargamos nuestros datos.
X = pd.read_csv('../data/car_data_input_clean.csv', sep=',')

# Mostramos la forma y los primeros registros de nuestro DataFrame.
print(X.shape)
X.head()

(23199, 21)


Unnamed: 0,PRODUCTO,TIPO_CARROCERIA,COMBUSTIBLE,Potencia_,TRANS,FORMA_PAGO,ESTADO_CIVIL,GENERO,OcupaciOn,PROVINCIA,...,Campanna2,Campanna3,Zona _Renta,REV_Garantia,Averia_grave,QUEJA_CAC,COSTE_VENTA,km_anno,Revisiones,Edad Cliente
0,0,0,0,1,1,0,0,1,1,4,...,0,0,2,0,2,1,3,0,2,0
1,0,0,0,1,1,0,0,0,1,47,...,0,0,2,1,3,0,0,0,2,2
2,0,0,0,1,1,3,0,1,1,30,...,0,0,1,0,3,0,0,0,4,0
3,0,0,0,1,1,2,0,0,2,41,...,0,1,0,0,3,0,2,0,3,0
4,0,0,0,1,1,2,0,0,1,4,...,0,0,1,1,1,0,2,2,2,2


In [179]:
# Importamos el modelo XGBoost y la variable de mapeo.

# Cargar el modelo
modelo = joblib.load('../modelos/modelo_XGB.pkl')

# Cargar la variable
with open('mappings.pkl', 'rb') as archivo:
    mappings = pickle.load(archivo)

In [180]:
# Creamos un objeto StandardScaler.
scaler = StandardScaler()

# 1º Filtramos las columnas que tengan valores mayores a 1.
cols = [col for col in X.columns if X[col].max() > 1]

# 2º Ajustamos y escalamos las columnas especificadas.
X[cols] = scaler.fit_transform(X[cols])

In [181]:
# Hacer predicciones en el conjunto de prueba
predictions = modelo.predict(X)

# También puedes obtener las probabilidades de las clases
probabilities = modelo.predict_proba(X)

In [182]:
# Mostrar las predicciones
print(predictions)

# Mostrar las probabilidades
print(probabilities)

[0 0 0 ... 1 0 0]
[[0.9519286  0.04807136]
 [0.9944276  0.00557235]
 [0.9327526  0.06724737]
 ...
 [0.1808303  0.8191697 ]
 [0.8527434  0.14725661]
 [0.8700051  0.12999494]]


In [183]:
# Agregar las predicciones y probabilidades al DataFrame original
X['Mas_1_Coche'] = predictions
X['Probabilidad_0'] = probabilities[:, 0]
X['Probabilidad_1'] = probabilities[:, 1]

# Mostrar el DataFrame con las predicciones y probabilidades
X.head()

Unnamed: 0,PRODUCTO,TIPO_CARROCERIA,COMBUSTIBLE,Potencia_,TRANS,FORMA_PAGO,ESTADO_CIVIL,GENERO,OcupaciOn,PROVINCIA,...,REV_Garantia,Averia_grave,QUEJA_CAC,COSTE_VENTA,km_anno,Revisiones,Edad Cliente,Mas_1_Coche,Probabilidad_0,Probabilidad_1
0,-1.166854,-1.158413,0,-0.64256,1,-0.913832,-0.53111,1,-0.120832,-1.412864,...,0,-0.207286,1,0.783597,-1.039652,-0.599505,-1.318503,0,0.951929,0.048071
1,-1.166854,-1.158413,0,-0.64256,1,-0.913832,-0.53111,0,-0.120832,1.394932,...,1,0.778333,0,-1.370869,-1.039652,-0.599505,0.738245,0,0.994428,0.005572
2,-1.166854,-1.158413,0,-0.64256,1,1.533921,-0.53111,1,-0.120832,0.284873,...,0,0.778333,0,-1.370869,-1.039652,0.19694,-1.318503,0,0.932753,0.067247
3,-1.166854,-1.158413,0,-0.64256,1,0.718003,-0.53111,0,3.43651,1.003147,...,0,0.778333,0,0.065442,-1.039652,-0.201283,-1.318503,1,0.003722,0.996278
4,-1.166854,-1.158413,0,-0.64256,1,0.718003,-0.53111,0,-0.120832,-1.412864,...,1,-1.192906,0,0.065442,0.508465,-0.599505,0.738245,1,0.44253,0.55747


In [184]:
# Deshacer el escalado
X_original = X.copy()  # Copiar el DataFrame original
X_original[cols] = scaler.inverse_transform(X[cols])  # Deshacer el escalado

X_original.head()

Unnamed: 0,PRODUCTO,TIPO_CARROCERIA,COMBUSTIBLE,Potencia_,TRANS,FORMA_PAGO,ESTADO_CIVIL,GENERO,OcupaciOn,PROVINCIA,...,REV_Garantia,Averia_grave,QUEJA_CAC,COSTE_VENTA,km_anno,Revisiones,Edad Cliente,Mas_1_Coche,Probabilidad_0,Probabilidad_1
0,-4.440892e-16,4.440892e-16,0,1.0,1,0.0,0.0,1,1.0,4.0,...,0,2.0,1,3.0,0.0,2.0,0.0,0,0.951929,0.048071
1,-4.440892e-16,4.440892e-16,0,1.0,1,0.0,0.0,0,1.0,47.0,...,1,3.0,0,-2.220446e-16,0.0,2.0,2.0,0,0.994428,0.005572
2,-4.440892e-16,4.440892e-16,0,1.0,1,3.0,0.0,1,1.0,30.0,...,0,3.0,0,-2.220446e-16,0.0,4.0,0.0,0,0.932753,0.067247
3,-4.440892e-16,4.440892e-16,0,1.0,1,2.0,0.0,0,2.0,41.0,...,0,3.0,0,2.0,0.0,3.0,0.0,1,0.003722,0.996278
4,-4.440892e-16,4.440892e-16,0,1.0,1,2.0,0.0,0,1.0,4.0,...,1,1.0,0,2.0,2.0,2.0,2.0,1,0.44253,0.55747


In [185]:
X_original[cols] = X_original[cols].round().astype(int)

X_original.head()

Unnamed: 0,PRODUCTO,TIPO_CARROCERIA,COMBUSTIBLE,Potencia_,TRANS,FORMA_PAGO,ESTADO_CIVIL,GENERO,OcupaciOn,PROVINCIA,...,REV_Garantia,Averia_grave,QUEJA_CAC,COSTE_VENTA,km_anno,Revisiones,Edad Cliente,Mas_1_Coche,Probabilidad_0,Probabilidad_1
0,0,0,0,1,1,0,0,1,1,4,...,0,2,1,3,0,2,0,0,0.951929,0.048071
1,0,0,0,1,1,0,0,0,1,47,...,1,3,0,0,0,2,2,0,0.994428,0.005572
2,0,0,0,1,1,3,0,1,1,30,...,0,3,0,0,0,4,0,0,0.932753,0.067247
3,0,0,0,1,1,2,0,0,2,41,...,0,3,0,2,0,3,0,1,0.003722,0.996278
4,0,0,0,1,1,2,0,0,1,4,...,1,1,0,2,2,2,2,1,0.44253,0.55747


In [186]:
# Invertir el diccionario de mapeo
inverted_mappings = {col: {v: k for k, v in mapping.items()} for col, mapping in mappings.items()}

# Aplicar el mapeo invertido al DataFrame
for column, mapping in inverted_mappings.items():
    X_original[column] = X_original[column].replace(mapping)

# Mostrar el DataFrame con las variables categóricas recuperadas
X_original.head()

Unnamed: 0,PRODUCTO,TIPO_CARROCERIA,COMBUSTIBLE,Potencia_,TRANS,FORMA_PAGO,ESTADO_CIVIL,GENERO,OcupaciOn,PROVINCIA,...,REV_Garantia,Averia_grave,QUEJA_CAC,COSTE_VENTA,km_anno,Revisiones,Edad Cliente,Mas_1_Coche,Probabilidad_0,Probabilidad_1
0,A,TIPO1,FUEL 1,Baja,M,Contado,CASADO,M,Empresa,Asturias,...,NO DATA,Averia muy grave,SI,3k-5k,0-10k,2,18-40,0,0.951929,0.048071
1,A,TIPO1,FUEL 1,Baja,M,Contado,CASADO,F,Empresa,Toledo,...,SI,No,NO,0-2k,0-10k,2,50-60,0,0.994428,0.005572
2,A,TIPO1,FUEL 1,Baja,M,Otros,CASADO,M,Empresa,Lerida,...,NO DATA,No,NO,0-2k,0-10k,4,18-40,0,0.932753,0.067247
3,A,TIPO1,FUEL 1,Baja,M,Financiera Marca,CASADO,F,Funcionario,Santa Cruz de Tenerife,...,NO DATA,No,NO,2k-3k,0-10k,3,18-40,1,0.003722,0.996278
4,A,TIPO1,FUEL 1,Baja,M,Financiera Marca,CASADO,F,Empresa,Asturias,...,SI,Averia leve,NO,2k-3k,10k-20k,2,50-60,1,0.44253,0.55747


In [189]:
clientes_predichos = X_original[X_original['Mas_1_Coche'] == 1]

clientes_predichos

Unnamed: 0,PRODUCTO,TIPO_CARROCERIA,COMBUSTIBLE,Potencia_,TRANS,FORMA_PAGO,ESTADO_CIVIL,GENERO,OcupaciOn,PROVINCIA,...,REV_Garantia,Averia_grave,QUEJA_CAC,COSTE_VENTA,km_anno,Revisiones,Edad Cliente,Mas_1_Coche,Probabilidad_0,Probabilidad_1
3,A,TIPO1,FUEL 1,Baja,M,Financiera Marca,CASADO,F,Funcionario,Santa Cruz de Tenerife,...,NO DATA,No,NO,2k-3k,0-10k,3,18-40,1,0.003722,0.996278
4,A,TIPO1,FUEL 1,Baja,M,Financiera Marca,CASADO,F,Empresa,Asturias,...,SI,Averia leve,NO,2k-3k,10k-20k,2,50-60,1,0.442530,0.557470
5,A,TIPO1,FUEL 1,Baja,M,Financiera Marca,CASADO,M,Empresa,Madrid,...,NO DATA,No,SI,2k-3k,0-10k,1,18-40,1,0.065644,0.934356
10,A,TIPO1,FUEL 1,Baja,M,Contado,CASADO,M,Empresa,Castellon,...,SI,Averia muy grave,SI,3k-5k,10k-20k,1,40-50,1,0.395804,0.604196
12,A,TIPO1,FUEL 1,Baja,M,Otros,CASADO,F,Empresa,La Rioja,...,SI,No,NO,2k-3k,0-10k,2,40-50,1,0.421043,0.578957
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23191,E,TIPO2,FUEL 1,Media,M,Contado,CASADO,F,Empresa,Madrid,...,NO DATA,Averia grave,NO,3k-5k,10k-20k,4,40-50,1,0.348117,0.651883
23193,E,TIPO2,FUEL 1,Media,M,Otros,CASADO,M,Empresa,Pontevedra,...,SI,No,NO,3k-5k,0-10k,6,40-50,1,0.455032,0.544968
23194,E,TIPO2,FUEL 1,Media,M,Contado,CASADO,M,Empresa,Madrid,...,SI,No,SI,3k-5k,0-10k,4,50-60,1,0.316625,0.683375
23195,E,TIPO2,FUEL 1,Alta,M,Financiera Marca,CASADO,M,Empresa,Zaragoza,...,NO DATA,No,NO,2k-3k,0-10k,0,18-40,1,0.004994,0.995006


In [191]:
X_original.to_csv('../data/predicciones.csv', index=False)
clientes_predichos.to_csv('../data/clientes_predichos.csv', index=False)