In [None]:
! pip install category_encoders

In [None]:
! pip install eli5

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import model_preprocessing as mp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import category_encoders
from category_encoders.one_hot import OneHotEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
import eli5

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [None]:
df = pd.read_csv('verlander.csv')
df.head()

# Clean

In [None]:
processor = mp.Preprocess(dataframe = df)

In [None]:
df = processor.process(processor.df)

In [None]:
df.head()

# Create Feature Matrix and Target Vector

In [None]:
feature_matrix = df.drop(columns = ['next_pitch'])
target_vector = df['next_pitch']

feature_matrix.shape, target_vector.shape

# Train Test Split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(feature_matrix, target_vector)

x_train.shape, x_test.shape, y_train.shape, y_test.shape

# Instantiate Model

In [None]:
clf = Pipeline(steps = [('scaler', RobustScaler()),
                         ('svm', LinearSVC())])


params = {
    'svm__max_iter': [20, 200]
}


grid = GridSearchCV(
    estimator = clf,
    scoring = 'accuracy',
    param_grid = params,
    refit = True,
    cv = 10,
    verbose = 10,
    n_jobs = -1)

# Train Model

In [None]:
grid.fit(x_train, y_train)

In [None]:
model = grid.best_estimator_
model

In [None]:
pd.DataFrame(grid.cv_results_).sort_values(by = 'mean_test_score').head()

# Evaluate Model

In [None]:
accuracy_score(model.predict(x_train), y_train)

In [None]:
df['next_pitch'].value_counts(normalize = True)

In [None]:
eli5.show_weights(model, feature_names = feature_matrix.columns)

In [None]:
explain_weights_df(model, feature_names = feature_matrix.columns)

In [None]:
accuracy_score(model.predict(x_test), y_test)