# Imports

In [4]:
import os
scripts_path = os.path.abspath('../scripts')
import importlib.util
spec = importlib.util.spec_from_file_location("helpers", os.path.join(scripts_path, "helpers.py"))
helpers = importlib.util.module_from_spec(spec)
spec.loader.exec_module(helpers)

import math
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report
from joblib import dump

# Tree

## Convert 4th down

### Default params

In [5]:
X_train, X_test, y_train, y_test = helpers.convert_data_split()

clf = RandomForestClassifier(random_state=0)

clf.fit(X_train, y_train)

dump(clf, '../models/random_forest/convert.joblib')

print(f"Test Accuracy: {clf.score(X_test, y_test)}")
print(f"Train Accuracy: {clf.score(X_train, y_train)}")

report = classification_report(y_test, clf.predict(X_test), zero_division=0)
print(report)

Test Accuracy: 0.8542074363992173
Train Accuracy: 0.9998252481476304
              precision    recall  f1-score   support

           0       0.06      0.01      0.02       295
   CONVERTED       0.39      0.30      0.34       397
      FAILED       0.46      0.42      0.43       430
  FIELD_GOAL       0.88      0.93      0.91      1711
        PUNT       0.92      0.97      0.95      4321

    accuracy                           0.85      7154
   macro avg       0.54      0.53      0.53      7154
weighted avg       0.82      0.85      0.83      7154



### Cross Validation

In [6]:
X_train, X_test, y_train, y_test = helpers.convert_data_split()

parameters = {
    'max_features': range(1, math.floor(math.sqrt(len(X_train[0])))), 
    'max_depth': range(1, 100),
    'n_estimators': range(10, 200),
    'min_samples_split': range(2, 20)
}
clf = RandomizedSearchCV(RandomForestClassifier(random_state=0), parameters, random_state=0, n_iter=200)

clf.fit(X_train, y_train)

dump(clf.best_estimator_, '../models/random_forest/convert_cv.joblib')

print(clf.best_params_)
print(f"Test Accuracy: {clf.score(X_test, y_test)}")
print(f"Train Accuracy: {clf.score(X_train, y_train)}")

report = classification_report(y_test, clf.predict(X_test), zero_division=0)
print(report)

{'n_estimators': 124, 'min_samples_split': 19, 'max_features': 1, 'max_depth': 43}
Test Accuracy: 0.8634330444506569
Train Accuracy: 0.8889626730043338
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       295
   CONVERTED       0.43      0.27      0.33       397
      FAILED       0.52      0.44      0.48       430
  FIELD_GOAL       0.87      0.95      0.91      1711
        PUNT       0.91      0.98      0.95      4321

    accuracy                           0.86      7154
   macro avg       0.55      0.53      0.53      7154
weighted avg       0.81      0.86      0.84      7154



## Attempt 4th down

### Default Params

In [7]:
X_train, X_test, y_train, y_test = helpers.attempt_data_split()

clf = RandomForestClassifier(random_state=0)
clf.fit(X_train, y_train)

dump(clf, '../models/random_forest/attempt.joblib')

print(f"Test Accuracy: {clf.score(X_test, y_test)}")
print(f"Train Accuracy: {clf.score(X_train, y_train)}")

report = classification_report(y_test, clf.predict(X_test), zero_division=0)
print(report)

Test Accuracy: 0.8827229521945764
Train Accuracy: 0.9998252481476304
              precision    recall  f1-score   support

           0       0.07      0.01      0.02       295
   ATTEMPTED       0.70      0.66      0.68       827
  FIELD_GOAL       0.88      0.92      0.90      1711
        PUNT       0.92      0.97      0.95      4321

    accuracy                           0.88      7154
   macro avg       0.65      0.64      0.64      7154
weighted avg       0.85      0.88      0.87      7154



### Cross Validation

In [8]:
X_train, X_test, y_train, y_test = helpers.attempt_data_split()

parameters = {
    'max_features': range(1, math.floor(math.sqrt(len(X_train[0])))), 
    'max_depth': range(1, 100),
    'n_estimators': range(10, 200),
    'min_samples_split': range(2, 20)
}
clf = RandomizedSearchCV(RandomForestClassifier(random_state=0), parameters, random_state=0, n_iter=200)

clf.fit(X_train, y_train)

dump(clf.best_estimator_, '../models/random_forest/attempt_cv.joblib')

print(clf.best_params_)
print(f"Test Accuracy: {clf.score(X_test, y_test)}")
print(f"Train Accuracy: {clf.score(X_train, y_train)}")

report = classification_report(y_test, clf.predict(X_test), zero_division=0)
print(report)

{'n_estimators': 41, 'min_samples_split': 12, 'max_features': 1, 'max_depth': 15}
Test Accuracy: 0.8916689963656695
Train Accuracy: 0.9109814064029079
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       295
   ATTEMPTED       0.73      0.68      0.70       827
  FIELD_GOAL       0.89      0.93      0.91      1711
        PUNT       0.92      0.98      0.95      4321

    accuracy                           0.89      7154
   macro avg       0.63      0.65      0.64      7154
weighted avg       0.85      0.89      0.87      7154

