In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, make_scorer

### Loading data...

In [2]:
df1 = pd.read_csv("selected_data.csv")

df2 = pd.read_csv("selected_data_w_intuition.csv")

In [3]:
X1 = df1.drop('salary', axis = 1)

X2 = df2.drop('salary', axis = 1)

y1 = df1['salary']

y2 = df2['salary']

In [4]:
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size = 0.2, random_state=12)

X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size = 0.2, random_state=12)

## Defining model and GridSearch

In [5]:
knn = KNeighborsClassifier()

In [6]:
param_grid = {
    'n_neighbors': range(1, 7),  # Number of neighbors to consider
    'weights': ['uniform', 'distance'],  # Weighting of neighbors
}

In [7]:
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, scoring='f1_macro', cv=5, n_jobs=-1)

# Model on dataset 1 training

In [8]:
grid_search.fit(X1_train, y1_train)

In [9]:
best_params = grid_search.best_params_
print(best_params)

{'n_neighbors': 5, 'weights': 'uniform'}


In [13]:
best_model = grid_search.best_estimator_
y1_pred = best_model.predict(X1_test)
f1 = f1_score(y1_test, y1_pred, average='macro')

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y1_test, y1_pred)

print(f1)
print(accuracy)

0.7907020019487201
0.85009765625


# Model on dataset 2 training 

In [15]:
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, scoring='f1_macro', cv=5, n_jobs=-1)

In [16]:
grid_search.fit(X2_train, y2_train)

In [17]:
best_params = grid_search.best_params_
print(best_params)

{'n_neighbors': 5, 'weights': 'uniform'}


In [18]:
best_model = grid_search.best_estimator_
y2_pred = best_model.predict(X2_test)
f1 = f1_score(y2_test, y2_pred, average='macro')

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y2_test, y2_pred)

print(f1)
print(accuracy)

0.7883658447401473
0.84765625
