In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ParameterGrid

In [1]:
X_train = pd.read_csv("../data/processed/killers_X_train.csv")
X_test = pd.read_csv("../data/processed/killers_X_test.csv")
X_train.head()

Unnamed: 0,date_occ,time_occ,area_name,vict_age,vict_sex,vict_descent,premis_desc,weapon_desc,status_desc,location,lat,lon,weekday,month,mocodes_desc
0,1638144000000000000,1330,8,57,1,0,10,16,2,8619,34.041,-118.3508,5,6,25394
1,1697241600000000000,1318,7,67,1,0,2,8,1,18355,33.9821,-118.3318,2,10,44592
2,1630627200000000000,2050,1,59,1,6,1,1,1,10895,34.0669,-118.2458,6,5,183
3,1647561600000000000,420,6,22,1,1,1,0,2,1393,34.0278,-118.201,6,3,57715
4,1601596800000000000,2110,1,21,1,1,10,9,2,231,34.0383,-118.2633,6,10,18003


In [7]:
model = KMeans(n_clusters = 180,n_init="auto", random_state = 14)
model.fit(X_train)

In [8]:
y_train = model.labels_

In [12]:
y_test = model.predict(X_test)

In [16]:
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

In [18]:
y_pred = knn_model.predict(X_train)

In [19]:
accuracy_score(y_train, y_pred)

0.8411867364746946

In [20]:
y_pred_test = knn_model.predict(X_test)

In [22]:
accuracy_score(y_test, y_pred_test)

0.6944444444444444

In [24]:

best_accuracy = 0
best_combination = None

param_grid  = {
    "n_neighbors":[3,5,7],
    "weights": ["uniform","distance"],
    "algorithm": ["auto", "kd_tree", "brute"], 
    'metric':["euclidean"]#,"manhattan","cityblock","minkowski"
    
}

for combination in ParameterGrid(param_grid):
    model = KNeighborsClassifier(**combination)
    print(model)
    model.fit(X_train, y_train)   
    y_pred = model.predict(X_train)
    acc = accuracy_score(y_train, y_pred)
    print("Combination:", combination)
    print("Accuracy:", acc)
    
    if acc > best_accuracy:
        best_accuracy = acc
        best_combination = combination
        
print("Best Combination:", best_combination)
print("Best Accuracy:", best_accuracy)

KNeighborsClassifier(metric='euclidean', n_neighbors=3)
Combination: {'algorithm': 'auto', 'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
Accuracy: 0.944153577661431
KNeighborsClassifier(metric='euclidean', n_neighbors=3, weights='distance')
Combination: {'algorithm': 'auto', 'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'distance'}
Accuracy: 1.0
KNeighborsClassifier(metric='euclidean')
Combination: {'algorithm': 'auto', 'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'uniform'}
Accuracy: 0.8411867364746946
KNeighborsClassifier(metric='euclidean', weights='distance')
Combination: {'algorithm': 'auto', 'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}
Accuracy: 1.0
KNeighborsClassifier(metric='euclidean', n_neighbors=7)
Combination: {'algorithm': 'auto', 'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
Accuracy: 0.7120418848167539
KNeighborsClassifier(metric='euclidean', n_neighbors=7, weights='distance')
Combination: {'algorithm': '

In [25]:
best_model = KNeighborsClassifier(**best_combination)
best_model.fit(X_train, y_train)

In [26]:
y_pred = best_model.predict(X_train)

In [27]:
accuracy_score(y_train, y_pred)

1.0

In [28]:
y_pred_test = best_model.predict(X_test)

In [29]:
accuracy_score(y_test, y_pred_test)

0.9513888888888888