# Starter Code

In [11]:
import os
scripts_path = os.path.abspath('../scripts')
import importlib.util
spec = importlib.util.spec_from_file_location("helpers", os.path.join(scripts_path, "helpers.py"))
helpers = importlib.util.module_from_spec(spec)
spec.loader.exec_module(helpers)

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from joblib import dump
import math

# KNN

# Convert 4th Down

# Default Params

In [12]:
X_train, X_test, y_train, y_test = helpers.convert_data_split()

knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
dump(knn, '../models/knn/convert.joblib')
print(f"Test Accuracy: {knn.score(X_test, y_test)}")
print(f"Train Accuracy: {knn.score(X_train, y_train)}")

report = classification_report(y_test, knn.predict(X_test), zero_division=0)
print(report)

Test Accuracy: 0.840928152082751
Train Accuracy: 0.8739340137005452
              precision    recall  f1-score   support

           0       0.08      0.03      0.04       295
   CONVERTED       0.38      0.36      0.37       397
      FAILED       0.42      0.36      0.39       430
  FIELD_GOAL       0.87      0.91      0.89      1711
        PUNT       0.92      0.96      0.94      4321

    accuracy                           0.84      7154
   macro avg       0.53      0.52      0.52      7154
weighted avg       0.81      0.84      0.83      7154



# Cross Validation

In [13]:
X_train, X_test, y_train, y_test = helpers.convert_data_split()

parameters = {
    'n_neighbors': range(1, 20),
}
knn = GridSearchCV(KNeighborsClassifier(), parameters)

knn.fit(X_train, y_train)

dump(knn.best_estimator_, '../models/knn/convert_cv.joblib')

print(knn.best_params_)
print(f"Test Accuracy: {knn.score(X_test, y_test)}")
print(f"Train Accuracy: {knn.score(X_train, y_train)}")

report = classification_report(y_test, knn.predict(X_test), zero_division=0)
print(report)

{'n_neighbors': 14}
Test Accuracy: 0.8514117975957506
Train Accuracy: 0.8603732699566615
              precision    recall  f1-score   support

           0       0.11      0.01      0.01       295
   CONVERTED       0.41      0.31      0.35       397
      FAILED       0.45      0.30      0.36       430
  FIELD_GOAL       0.85      0.94      0.89      1711
        PUNT       0.91      0.98      0.94      4321

    accuracy                           0.85      7154
   macro avg       0.55      0.51      0.51      7154
weighted avg       0.81      0.85      0.82      7154



# Attempt

# Default Params

In [14]:
X_train, X_test, y_train, y_test = helpers.attempt_data_split()

knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
dump(knn, '../models/knn/attempt.joblib')
print(f"Test Accuracy: {knn.score(X_test, y_test)}")
print(f"Train Accuracy: {knn.score(X_train, y_train)}")

Test Accuracy: 0.8704221414593235
Train Accuracy: 0.8967216552495456


# Cross Validation

In [15]:
X_train, X_test, y_train, y_test = helpers.attempt_data_split()

parameters = {
    'n_neighbors': range(1, 20),
}
knn = GridSearchCV(KNeighborsClassifier(), parameters)

knn.fit(X_train, y_train)

dump(knn.best_estimator_, '../models/knn/attempt_cv.joblib')

print(knn.best_params_)
print(f"Test Accuracy: {knn.score(X_test, y_test)}")
print(f"Train Accuracy: {knn.score(X_train, y_train)}")

report = classification_report(y_test, knn.predict(X_test), zero_division=0)
print(report)

{'n_neighbors': 16}
Test Accuracy: 0.8793681856304165
Train Accuracy: 0.8846288270655669
              precision    recall  f1-score   support

           0       0.14      0.00      0.01       295
   ATTEMPTED       0.70      0.61      0.65       827
  FIELD_GOAL       0.87      0.92      0.90      1711
        PUNT       0.91      0.97      0.94      4321

    accuracy                           0.88      7154
   macro avg       0.66      0.63      0.62      7154
weighted avg       0.85      0.88      0.86      7154

