# Implementación de Modelos Predictivos - Precios de Aguacate

## 1. Preparación de Datos para Modelado

In [12]:

# Importar librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score


# Cargar dataset limpio
avocado = pd.read_csv('../data/avocado_clean.csv')

# Separar features y target
X = avocado.drop('AveragePrice', axis=1)
y = avocado['AveragePrice']

# Dividir en train (70%) y test (30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Escalar features (excepto variables ya codificadas)
scaler = StandardScaler()
cols_to_scale = [col for col in X.columns if col not in ['organic', 'region_encoded']]
X_train[cols_to_scale] = scaler.fit_transform(X_train[cols_to_scale])
X_test[cols_to_scale] = scaler.transform(X_test[cols_to_scale])


## 2. Modelo 1: Regresión Lineal

### Implementación

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Crear y entrenar modelo
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predecir y evaluar
y_pred_lr = lr.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print("Regresión Lineal:")
print(f"MSE: {mse_lr:.4f}")
print(f"R²: {r2_lr:.4f}")

Regresión Lineal:
MSE: 0.0852
R²: 0.4716


## 3. Modelo 2: K-Nearest Neighbors (KNN) para Clasificación

### Transformación a Problema de Clasificación


In [14]:
# Convertir precio en categorías (bajo, medio, alto)
y_train_cls = pd.cut(y_train, bins=3, labels=['bajo', 'medio', 'alto'])
y_test_cls = pd.cut(y_test, bins=3, labels=['bajo', 'medio', 'alto'])

# Implementación KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train_cls)

# Evaluación
y_pred_knn = knn.predict(X_test)
acc_knn = accuracy_score(y_test_cls, y_pred_knn)
f1_knn = f1_score(y_test_cls, y_pred_knn, average='weighted')

print("\nKNN (Clasificación):")
print(f"Accuracy: {acc_knn:.4f}")
print(f"F1-Score: {f1_knn:.4f}")


KNN (Clasificación):
Accuracy: 0.8216
F1-Score: 0.8202
