# 3.0 Model Training and Testing

### Data Setup

In [1]:
import numpy as np
import pandas as pd

from helpers.datasets import load_train_data,load_feature_weights
X, y, feature_names = load_train_data()
weights = load_feature_weights()

## 3.1 Model Training based on Feature Weights

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics.pairwise import euclidean_distances
from joblib import dump, load

def custom_distance(weights):
    def weighted_euclidean(x, y):
        diff = x - y
        weighted_diff = diff * weights
        return np.sqrt(np.sum(weighted_diff ** 2))
    return weighted_euclidean

def train_knn_with_custom_distance(X, y, weights):
    feature_names = X.columns
    method_used = 'gini'
    weight_values = np.array([weights.loc[feature, method_used] for feature in feature_names])    
    knn = KNeighborsClassifier(n_neighbors=5, metric=custom_distance(weight_values))
    if knn:
        knn.fit(X, y)
    return knn

X_df = pd.DataFrame(X, columns=weights.index)
knn_model = train_knn_with_custom_distance(X_df, y, weights)
dump(knn_model, 'experiments/gini_model.joblib')

# Resources

1. https://medium.com/analytics-vidhya/feature-engineering-experiment-weighted-knn-3f28dfdf30e1