# Imports

In [9]:
import os
scripts_path = os.path.abspath('../scripts')
import importlib.util
spec = importlib.util.spec_from_file_location("helpers", os.path.join(scripts_path, "helpers.py"))
helpers = importlib.util.module_from_spec(spec)
spec.loader.exec_module(helpers)

import math
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report
from joblib import dump

# Tree

## Convert 4th down

### Default params

In [11]:
X_train, X_test, y_train, y_test = helpers.convert_data_split()

clf = RandomForestClassifier(random_state=0)

clf.fit(X_train, y_train)

dump(clf, '../models/random_forest/convert.joblib')

print(f"Test Accuracy: {clf.score(X_test, y_test)}")
print(f"Train Accuracy: {clf.score(X_train, y_train)}")

report = classification_report(y_test, clf.predict(X_test), zero_division=0)
print(report)

Test Accuracy: 0.8567764001710133
Train Accuracy: 1.0
              precision    recall  f1-score   support

           0       0.24      0.07      0.11        85
   CONVERTED       0.47      0.38      0.42       160
      FAILED       0.47      0.40      0.43       154
  FIELD_GOAL       0.88      0.94      0.91       542
        PUNT       0.92      0.98      0.95      1398

    accuracy                           0.86      2339
   macro avg       0.60      0.55      0.56      2339
weighted avg       0.83      0.86      0.84      2339



### Cross Validation

In [12]:
X_train, X_test, y_train, y_test = helpers.convert_data_split()

parameters = {
    'max_features': range(1, math.floor(math.sqrt(len(X_train.iloc[0])))), 
    'max_depth': range(1, 100),
    'n_estimators': range(10, 200),
    'min_samples_split': range(2, 20)
}
clf = RandomizedSearchCV(RandomForestClassifier(random_state=0), parameters, random_state=0, n_iter=200)

clf.fit(X_train, y_train)

dump(clf.best_estimator_, '../models/random_forest/convert_cv.joblib')

print(clf.best_params_)
print(f"Test Accuracy: {clf.score(X_test, y_test)}")
print(f"Train Accuracy: {clf.score(X_train, y_train)}")

report = classification_report(y_test, clf.predict(X_test), zero_division=0)
print(report)

{'n_estimators': 123, 'min_samples_split': 8, 'max_features': 1, 'max_depth': 36}
Test Accuracy: 0.8623343309106456
Train Accuracy: 0.9214239897370109
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        85
   CONVERTED       0.51      0.40      0.45       160
      FAILED       0.49      0.38      0.43       154
  FIELD_GOAL       0.88      0.95      0.91       542
        PUNT       0.92      0.99      0.95      1398

    accuracy                           0.86      2339
   macro avg       0.56      0.54      0.55      2339
weighted avg       0.82      0.86      0.84      2339



## Attempt 4th down

### Default Params

In [13]:
X_train, X_test, y_train, y_test = helpers.attempt_data_split()

clf = RandomForestClassifier(random_state=0)
clf.fit(X_train, y_train)

dump(clf, '../models/random_forest/attempt.joblib')

print(f"Test Accuracy: {clf.score(X_test, y_test)}")
print(f"Train Accuracy: {clf.score(X_train, y_train)}")

report = classification_report(y_test, clf.predict(X_test), zero_division=0)
print(report)

Test Accuracy: 0.8956819153484396
Train Accuracy: 1.0
              precision    recall  f1-score   support

           0       0.28      0.06      0.10        85
   ATTEMPTED       0.76      0.71      0.74       314
  FIELD_GOAL       0.90      0.93      0.91       542
        PUNT       0.93      0.98      0.95      1398

    accuracy                           0.90      2339
   macro avg       0.72      0.67      0.67      2339
weighted avg       0.88      0.90      0.88      2339



### Cross Validation

In [14]:
X_train, X_test, y_train, y_test = helpers.attempt_data_split()

parameters = {
    'max_features': range(1, math.floor(math.sqrt(len(X_train.iloc[0])))), 
    'max_depth': range(1, 100),
    'n_estimators': range(10, 200),
    'min_samples_split': range(2, 20)
}
clf = RandomizedSearchCV(RandomForestClassifier(random_state=0), parameters, random_state=0, n_iter=200)

clf.fit(X_train, y_train)

dump(clf.best_estimator_, '../models/random_forest/attempt_cv.joblib')

print(clf.best_params_)
print(f"Test Accuracy: {clf.score(X_test, y_test)}")
print(f"Train Accuracy: {clf.score(X_train, y_train)}")

report = classification_report(y_test, clf.predict(X_test), zero_division=0)
print(report)

{'n_estimators': 80, 'min_samples_split': 9, 'max_features': 1, 'max_depth': 49}
Test Accuracy: 0.8961094484822574
Train Accuracy: 0.9222792388283088
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        85
   ATTEMPTED       0.77      0.72      0.75       314
  FIELD_GOAL       0.89      0.92      0.91       542
        PUNT       0.92      0.98      0.95      1398

    accuracy                           0.90      2339
   macro avg       0.65      0.66      0.65      2339
weighted avg       0.86      0.90      0.88      2339

