# KNN

In [1]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_excel(r"C:\Users\Metrovalores\Documents\Precio_VN_NC.xlsx")

df.head()

Unnamed: 0,Precio,Valor_Nominal
0,95.0,60.49
1,98.0,97.02
2,95.0,114.92
3,96.0,141.14
4,97.0,160.4


In [4]:
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV, RepeatedKFold
from sklearn.neighbors import KNeighborsRegressor


X = df[['Valor_Nominal']]  # Load your feature data
y = df['Precio'] # Load your target data

# Define the parameter grid for grid search
param_grid = {
    'n_neighbors': range(1, 30),  
    'metric': ['minkowski', 'euclidean', 'manhattan']  # Different distance metrics
}

# Define the KNN model
knn = KNeighborsRegressor()

# Set up Repeated Stratified K-Fold cross-validation
cv = RepeatedKFold(n_splits=10, n_repeats=5, random_state=42)

# Define the GridSearchCV object
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=cv, scoring='neg_mean_squared_error')  # Minimize mean squared error

# Train the model with grid search
grid_search.fit(X, y)

# Get the best model and its parameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_

# Print the best parameters and potentially evaluate the model on unseen data

print("Best KNN parameters:", best_params)
# Use the best_model for prediction on new data

Best KNN parameters: {'metric': 'minkowski', 'n_neighbors': 25}


In [5]:
final_model = grid_search.best_estimator_

final_model.fit(X, y)

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_pred = final_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)


print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R-squared (R2):", r2)

Mean Squared Error: 0.06490777166145258
Mean Absolute Error: 0.1349512558139537
R-squared (R2): 0.7994471238014451


## Estimación

In [8]:
new_data = [[1000000]]

# Make prediction using the model (assuming 'final_model' is your trained model)
predicted_price = final_model.predict(new_data)

print("Precio estimado para la nota de crédito:", predicted_price)

Precio estimado para la nota de crédito: [99.769464]


