# Starter Code

In [30]:
import os
scripts_path = os.path.abspath('../scripts')
import importlib.util
spec = importlib.util.spec_from_file_location("helpers", os.path.join(scripts_path, "helpers.py"))
helpers = importlib.util.module_from_spec(spec)
spec.loader.exec_module(helpers)

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from joblib import dump
import math

# KNN

# Convert 4th Down

# Default Params

In [31]:
X_train, X_test, y_train, y_test = helpers.convert_data_split()

knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
dump(knn, '../models/knn/convert.joblib')
print(f"Test Accuracy: {knn.score(X_test, y_test)}")
print(f"Train Accuracy: {knn.score(X_train, y_train)}")

report = classification_report(y_test, knn.predict(X_test), zero_division=0)
print(report)

Test Accuracy: 0.8110303548525011
Train Accuracy: 0.8541800299337182
              precision    recall  f1-score   support

           0       0.07      0.02      0.04        85
   CONVERTED       0.34      0.20      0.25       160
      FAILED       0.37      0.27      0.31       154
  FIELD_GOAL       0.76      0.90      0.83       542
        PUNT       0.91      0.95      0.93      1398

    accuracy                           0.81      2339
   macro avg       0.49      0.47      0.47      2339
weighted avg       0.77      0.81      0.79      2339



# Cross Validation

In [32]:
X_train, X_test, y_train, y_test = helpers.convert_data_split()

parameters = {
    'n_neighbors': range(1, 20),
}
knn = GridSearchCV(KNeighborsClassifier(), parameters)

knn.fit(X_train, y_train)

dump(knn.best_estimator_, '../models/knn/convert_cv.joblib')

print(knn.best_params_)
print(f"Test Accuracy: {knn.score(X_test, y_test)}")
print(f"Train Accuracy: {knn.score(X_train, y_train)}")

report = classification_report(y_test, knn.predict(X_test), zero_division=0)
print(report)

{'n_neighbors': 12}
Test Accuracy: 0.8182984181274049
Train Accuracy: 0.8362197990164636
              precision    recall  f1-score   support

           0       0.12      0.01      0.02        85
   CONVERTED       0.43      0.10      0.16       160
      FAILED       0.41      0.21      0.28       154
  FIELD_GOAL       0.75      0.92      0.83       542
        PUNT       0.88      0.97      0.93      1398

    accuracy                           0.82      2339
   macro avg       0.52      0.45      0.44      2339
weighted avg       0.76      0.82      0.78      2339



# Attempt

# Default Params

In [33]:
X_train, X_test, y_train, y_test = helpers.attempt_data_split()

knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
dump(knn, '../models/knn/attempt.joblib')
print(f"Test Accuracy: {knn.score(X_test, y_test)}")
print(f"Train Accuracy: {knn.score(X_train, y_train)}")

Test Accuracy: 0.8371098760153912
Train Accuracy: 0.8706435749412016


# Cross Validation

In [34]:
X_train, X_test, y_train, y_test = helpers.attempt_data_split()

parameters = {
    'n_neighbors': range(1, 20),
}
knn = GridSearchCV(KNeighborsClassifier(), parameters)

knn.fit(X_train, y_train)

dump(knn.best_estimator_, '../models/knn/attempt_cv.joblib')

print(knn.best_params_)
print(f"Test Accuracy: {knn.score(X_test, y_test)}")
print(f"Train Accuracy: {knn.score(X_train, y_train)}")

report = classification_report(y_test, knn.predict(X_test), zero_division=0)
print(report)

{'n_neighbors': 14}
Test Accuracy: 0.8396750748182984
Train Accuracy: 0.8525764378875348
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        85
   ATTEMPTED       0.68      0.34      0.45       314
  FIELD_GOAL       0.77      0.92      0.84       542
        PUNT       0.89      0.97      0.93      1398

    accuracy                           0.84      2339
   macro avg       0.58      0.56      0.55      2339
weighted avg       0.80      0.84      0.81      2339

