## Purpose: Try different models-- Part3.
### Grid search with upsampling and scaling.

In [1]:
# import dependencies.
import pandas as pd
import numpy as np

from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC

#### STEP1: Read in dataset.  Remove data from 2016-2019.
- data from 2016-2018 will be used to bs test the model.
- data from 2019 will be used to predict the winners of the 2019 WS.

In [2]:
# read in the data.
team_data = pd.read_csv("../../Resources/clean_data_1969.csv")
del team_data["Unnamed: 0"]
team_data.head()

Unnamed: 0,team,year,A,DP,E,G2,GS2,INN,PB,PO,...,SHO,SO1,SV,SVO,TBF,W,WHIP,WP,WPCT,winners
0,St. Louis Cardinals,2019,1033,114,43,104,936,8313.0,3,2771,...,4,895,33,42,3896,56,1.29,21,0.538,0
1,Arizona Diamondbacks,2019,1010,83,45,105,945,8538.0,2,2846,...,7,925,24,37,4001,53,1.28,35,0.505,0
2,Kansas City Royals,2019,990,105,45,106,954,8421.0,6,2807,...,5,816,24,41,4125,39,1.46,34,0.368,0
3,Houston Astros,2019,875,54,50,106,954,8589.0,6,2863,...,7,1074,27,42,3929,67,1.14,31,0.632,0
4,Tampa Bay Rays,2019,975,92,53,107,963,8760.0,11,2920,...,6,1037,26,43,3985,59,1.16,40,0.551,0


In [3]:
# remove data from 2016 through 2019.
team_data_new = team_data.loc[team_data["year"] < 2016]
team_data_new.head()

Unnamed: 0,team,year,A,DP,E,G2,GS2,INN,PB,PO,...,SHO,SO1,SV,SVO,TBF,W,WHIP,WP,WPCT,winners
120,San Francisco Giants,2015,1639,136,72,162,1458,13143.0,6,4381,...,11,1309,43,72,6048,87,1.21,40,0.537,0
121,Washington Nationals,2015,1425,142,73,162,1458,13137.0,17,4379,...,12,1476,46,60,6036,95,1.19,47,0.586,0
122,Houston Astros,2015,1599,135,77,162,1458,13212.0,18,4404,...,8,1396,44,64,6180,84,1.29,98,0.519,0
123,Detroit Tigers,2015,1537,148,75,161,1449,12852.0,5,4284,...,8,1232,47,66,6048,86,1.32,44,0.534,0
124,Boston Red Sox,2015,1427,139,75,162,1458,12957.0,37,4319,...,5,1362,43,61,6073,93,1.27,52,0.574,0


In [4]:
target = team_data_new["winners"]
features = team_data_new.drop({"team", "year", "winners"}, axis=1)
feature_columns = list(features.columns)
print (target.shape)
print (features.shape)
print (feature_columns)

(1266,)
(1266, 59)
['A', 'DP', 'E', 'G2', 'GS2', 'INN', 'PB', 'PO', 'TC', '2B', '3B', 'AB', 'AO', 'BB', 'CS', 'G', 'GDP', 'H', 'HBP', 'HR', 'IBB', 'NP_x', 'OBP', 'OPS_x', 'PA', 'R', 'RBI', 'SAC', 'SB', 'SF', 'SLG', 'SO', 'TB', 'XBH', 'BB1', 'BK', 'CG', 'ER', 'ERA', 'G1', 'GF', 'GS', 'H1', 'HB', 'HR1', 'IBB1', 'IP', 'L', 'OBP1', 'R1', 'SHO', 'SO1', 'SV', 'SVO', 'TBF', 'W', 'WHIP', 'WP', 'WPCT']


#### STEP2: Upsample and scale data.

In [5]:
# reset the index.
team_data_new = team_data_new.reset_index().drop({"index"}, axis=1)
team_data_new.head()

Unnamed: 0,team,year,A,DP,E,G2,GS2,INN,PB,PO,...,SHO,SO1,SV,SVO,TBF,W,WHIP,WP,WPCT,winners
0,San Francisco Giants,2015,1639,136,72,162,1458,13143.0,6,4381,...,11,1309,43,72,6048,87,1.21,40,0.537,0
1,Washington Nationals,2015,1425,142,73,162,1458,13137.0,17,4379,...,12,1476,46,60,6036,95,1.19,47,0.586,0
2,Houston Astros,2015,1599,135,77,162,1458,13212.0,18,4404,...,8,1396,44,64,6180,84,1.29,98,0.519,0
3,Detroit Tigers,2015,1537,148,75,161,1449,12852.0,5,4284,...,8,1232,47,66,6048,86,1.32,44,0.534,0
4,Boston Red Sox,2015,1427,139,75,162,1458,12957.0,37,4319,...,5,1362,43,61,6073,93,1.27,52,0.574,0


In [6]:
# remove team and year.
feature_columns_new = feature_columns + ["winners"]
team_data_new = team_data[feature_columns_new]
team_data_new.head()

Unnamed: 0,A,DP,E,G2,GS2,INN,PB,PO,TC,2B,...,SHO,SO1,SV,SVO,TBF,W,WHIP,WP,WPCT,winners
0,1033,114,43,104,936,8313.0,3,2771,3847,157,...,4,895,33,42,3896,56,1.29,21,0.538,0
1,1010,83,45,105,945,8538.0,2,2846,3901,203,...,7,925,24,37,4001,53,1.28,35,0.505,0
2,990,105,45,106,954,8421.0,6,2807,3842,185,...,5,816,24,41,4125,39,1.46,34,0.368,0
3,875,54,50,106,954,8589.0,6,2863,3788,200,...,7,1074,27,42,3929,67,1.14,31,0.632,0
4,975,92,53,107,963,8760.0,11,2920,3948,195,...,6,1037,26,43,3985,59,1.16,40,0.551,0


In [7]:
# upsample for a more balanced dataset.
def upsample(dataset, no_samples):
    '''
    INPUT: 
    -dataset = dataset without team names and year.
    -n_samples = number of minority_unsampled.
    
    OUTPUT:
    -X_train_scaled = scaled X train data.
    -X_test_scaled = scaled X test data.
    -y_train = y train data
    -y_test = y test data
    
    DESCRIPTION:
    -dataset is taken in and split into minority and majority classes.
    -dataset is then upsampled for the mainority class
    -split the data into features and targets
    -split data into train and test sets
    -train and test sets were are scaled.
    '''
    
    # separate majority and minority classes.
    df_majority = dataset.loc[dataset["winners"] == 0]
    df_minority = dataset.loc[dataset["winners"] == 1]

    # upsample minority class.
    df_minority_unsampled = resample(df_minority,
                                    replace=True,
                                    n_samples=no_samples,
                                    random_state=123)

    # combine majority class with upsampled minority class.
    df_upsampled = pd.concat([df_majority, df_minority_unsampled])

    # separate features and target.
    y = df_upsampled["winners"]
    X = df_upsampled[feature_columns]
    
    # split into train and test sets.
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    # scale X_train and X_test.
    scaler = StandardScaler()

    # transform the training and testing data.
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.fit_transform(X_test)
    
    return X_train_scaled, X_test_scaled, y_train, y_test

In [8]:
# Do three different upsamplings.
X_train_100, X_test_100, y_train_100, y_test_100 = upsample(team_data_new, 2234)
X_train_50, X_test_50, y_train_50, y_test_50 = upsample(team_data_new, 1117)
X_train_25, X_test_25, y_train_25, y_test_25 = upsample(team_data_new, 559)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


#### STEP3: Grid Search Model--Logistic Regression.

In [9]:
def grid_search_logistic(X_train, X_test, y_train, y_test):
    '''
    INPUT: 
    -X_train = scaled X train data.
    -X_test = scaled X test data.
    -y_train = y train data.
    -y_test = y test data.
    
    OUTPUT:
    -classification report (has F1 score, precision and recall).
    -grid = saved model for prediction. 
    
    DESCRIPTION:
    -the scaled and split data is put through a grid search with logistic.
    -the model is trained.
    -a prediction is made.
    -print out the classification report and give the model.
    '''
    
    # fit the model.
    model = LogisticRegression(solver="lbfgs", max_iter= 2000)
    
    # create gridsearch estimator.
    param_grid = {"C": [0.001, 0.01, 0.1, 1, 10, 100]}
    grid = GridSearchCV(model, param_grid, verbose=3)

    # fit the model.
    grid.fit(X_train, y_train)

    # predict.
    prediction = grid.predict(X_test)
    
    # print out the basic information about the grid search.
    print (grid.best_params_)
    print (grid.best_score_)
    print (grid.best_estimator_)
    
    grid = grid.best_estimator_
    predictions = grid.predict(X_test)
    print (classification_report(y_test, prediction, target_names=["0", "1"]))
    
    return grid

In [10]:
# for 1 part 0 to 1 part 1
model_100 = grid_search_logistic(X_train_100, X_test_100, y_train_100, y_test_100)

Fitting 3 folds for each of 6 candidates, totalling 18 fits
[CV] C=0.001 .........................................................
[CV] ................ C=0.001, score=0.7189249720044792, total=   0.0s
[CV] C=0.001 .........................................................
[CV] ................ C=0.001, score=0.7155655095184771, total=   0.0s
[CV] C=0.001 .........................................................
[CV] ................ C=0.001, score=0.7399103139013453, total=   0.0s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7491601343784995, total=   0.0s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7502799552071668, total=   0.0s
[CV] C=0.01 ..........................................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ................. C=0.01, score=0.7612107623318386, total=   0.0s
[CV] C=0.1 ...........................................................
[CV] .................. C=0.1, score=0.7838745800671892, total=   0.0s
[CV] C=0.1 ...........................................................
[CV] .................. C=0.1, score=0.7793952967525196, total=   0.0s
[CV] C=0.1 ...........................................................
[CV] .................. C=0.1, score=0.7881165919282511, total=   0.0s
[CV] C=1 .............................................................
[CV] .................... C=1, score=0.7917133258678611, total=   0.1s
[CV] C=1 .............................................................
[CV] .................... C=1, score=0.7917133258678611, total=   0.1s
[CV] C=1 .............................................................
[CV] .................... C=1, score=0.7556053811659192, total=   0.1s
[CV] C=10 ............................................................
[CV] .

[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    1.6s finished


{'C': 100}
0.7912621359223301
LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=2000, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)
              precision    recall  f1-score   support

           0       0.77      0.47      0.59       356
           1       0.72      0.91      0.80       537

   micro avg       0.73      0.73      0.73       893
   macro avg       0.75      0.69      0.70       893
weighted avg       0.74      0.73      0.72       893



In [11]:
# for 1 part 0 to 0.5 part 1
model_50 = grid_search_logistic(X_train_50, X_test_50, y_train_50, y_test_50)

Fitting 3 folds for each of 6 candidates, totalling 18 fits
[CV] C=0.001 .........................................................
[CV] ................ C=0.001, score=0.6758957654723127, total=   0.0s
[CV] C=0.001 .........................................................
[CV] ................ C=0.001, score=0.6786296900489397, total=   0.0s
[CV] C=0.001 .........................................................
[CV] ................ C=0.001, score=0.6623164763458401, total=   0.0s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.6775244299674267, total=   0.0s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7096247960848288, total=   0.0s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7128874388254486, total=   0.0s
[CV] C=0.1 ...........................................................
[CV] ............

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] .................... C=1, score=0.7911908646003263, total=   0.0s
[CV] C=10 ............................................................
[CV] .................... C=10, score=0.758957654723127, total=   0.1s
[CV] C=10 ............................................................
[CV] .................... C=10, score=0.765089722675367, total=   0.1s
[CV] C=10 ............................................................
[CV] .................... C=10, score=0.797716150081566, total=   0.1s
[CV] C=100 ...........................................................
[CV] ................... C=100, score=0.745928338762215, total=   0.2s
[CV] C=100 ...........................................................
[CV] .................. C=100, score=0.7699836867862969, total=   0.2s
[CV] C=100 ...........................................................
[CV] .................. C=100, score=0.7846655791190864, total=   0.2s
{'C': 10}
0.7739130434782608
LogisticRegression(C=10, class_weight=None, dual

[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    1.1s finished


In [12]:
# for 1 part 0 to 0.25 part 1
model_25 = grid_search_logistic(X_train_25, X_test_25, y_train_25, y_test_25)

Fitting 3 folds for each of 6 candidates, totalling 18 fits
[CV] C=0.001 .........................................................
[CV] ................ C=0.001, score=0.6983122362869199, total=   0.0s
[CV] C=0.001 .........................................................
[CV] ................ C=0.001, score=0.7067510548523207, total=   0.0s
[CV] C=0.001 .........................................................
[CV] ................ C=0.001, score=0.7088607594936709, total=   0.0s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7088607594936709, total=   0.0s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7320675105485233, total=   0.0s
[CV] C=0.01 ..........................................................
[CV] .................. C=0.01, score=0.740506329113924, total=   0.0s
[CV] C=0.1 ...........................................................
[CV] ............

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] .................. C=0.1, score=0.7320675105485233, total=   0.0s
[CV] C=1 .............................................................
[CV] .................... C=1, score=0.7489451476793249, total=   0.0s
[CV] C=1 .............................................................
[CV] .................... C=1, score=0.7616033755274262, total=   0.0s
[CV] C=1 .............................................................
[CV] .................... C=1, score=0.7130801687763713, total=   0.0s
[CV] C=10 ............................................................
[CV] ................... C=10, score=0.7616033755274262, total=   0.1s
[CV] C=10 ............................................................
[CV] ................... C=10, score=0.7531645569620253, total=   0.1s
[CV] C=10 ............................................................
[CV] ................... C=10, score=0.7320675105485233, total=   0.1s
[CV] C=100 ...........................................................
[CV] .

[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    0.9s finished


{'C': 100}
0.7679324894514767
LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=2000, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)
              precision    recall  f1-score   support

           0       0.78      0.88      0.82       341
           1       0.53      0.35      0.42       133

   micro avg       0.73      0.73      0.73       474
   macro avg       0.65      0.62      0.62       474
weighted avg       0.71      0.73      0.71       474



This is not significantly better than the straight logistic regression.

In [13]:
def predict_the_winner(model, year, team_data, X_train):
    '''
    INPUT: 
    -X_train = scaled X train data.
    -model = the saved model.
    -team_data = complete dataframe with all data.
    -year = the year want to look at.
    
    OUTPUT:
    -printed prediction.
    
    DESCRIPTION:
    -data from year of interest is isolated.
    -the data are scaled.
    -the prediction is made.
    -print out the resulting probability and the name of the team.
    '''
    
    # grab the data.
    team_data = team_data.loc[team_data["year"] == year].reset_index()

    # set features (no team, year, winners).
    # set target (winners).
    features = team_data[feature_columns]
    
    # scale X_train and X_test.
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    features = scaler.fit_transform(features)
    
    # fit the model.
    probabilities = model.predict_proba(features)

    # convert predictions to datafram.e
    WS_predictions = pd.DataFrame(probabilities[:,1])

    # Sort the DataFrame (descending)
    WS_predictions = WS_predictions.sort_values(0, ascending=False)

    WS_predictions['Probability'] = WS_predictions[0]

    # Print 50 highest probability HoF inductees from still eligible players
    for i, row in WS_predictions.head(50).iterrows():
       prob = ' '.join(('WS Probability =', str(row['Probability'])))
       print('')
       print(prob)
       print(team_data.iloc[i,1:27]["team"])

In [14]:
# predict for 2018.
predict_the_winner(model_100, 2018, team_data, X_train_100)


WS Probability = 0.9999999796151287
Detroit Tigers

WS Probability = 0.9999999732957945
Chicago White Sox

WS Probability = 0.9999998859544824
Miami Marlins

WS Probability = 0.9999998014401282
Cincinnati Reds

WS Probability = 0.9999877805103611
Minnesota Twins

WS Probability = 0.9999833878578422
Cleveland Indians

WS Probability = 0.9994345267302207
Pittsburgh Pirates

WS Probability = 0.997472521091535
St. Louis Cardinals

WS Probability = 0.9972722844171125
Baltimore Orioles

WS Probability = 0.9972473198536111
Washington Nationals

WS Probability = 0.993050604401324
New York Mets

WS Probability = 0.9929440901662251
Arizona Diamondbacks

WS Probability = 0.9513661559025627
Los Angeles Angels

WS Probability = 0.7434452425771586
Houston Astros

WS Probability = 0.6195732575227896
Los Angeles Dodgers

WS Probability = 0.4464770163316731
New York Yankees

WS Probability = 0.43546171304255155
Boston Red Sox

WS Probability = 0.13511797137116857
Philadelphia Phillies

WS Probability 

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [15]:
# predict for 2017.
predict_the_winner(model_100, 2017, team_data, X_train_100)


WS Probability = 0.9999999999994349
Cleveland Indians

WS Probability = 0.9999999999910565
Boston Red Sox

WS Probability = 0.9999999999904439
Miami Marlins

WS Probability = 0.9999999973425007
Atlanta Braves

WS Probability = 0.9999999877697857
Pittsburgh Pirates

WS Probability = 0.9999994318227372
Detroit Tigers

WS Probability = 0.9999929132010011
Washington Nationals

WS Probability = 0.9999902701727772
Oakland Athletics

WS Probability = 0.9982717478389151
Minnesota Twins

WS Probability = 0.9967912011061278
Seattle Mariners

WS Probability = 0.9964095422155289
Los Angeles Angels

WS Probability = 0.9810973541159649
Houston Astros

WS Probability = 0.9513902952056933
Chicago White Sox

WS Probability = 0.9492792432519985
New York Yankees

WS Probability = 0.9355512306819711
San Francisco Giants

WS Probability = 0.8116889119107771
Kansas City Royals

WS Probability = 0.3927564960958477
Cincinnati Reds

WS Probability = 0.22322911914494856
Baltimore Orioles

WS Probability = 0.08

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [16]:
# predict for 2016.
predict_the_winner(model_100, 2016, team_data, X_train_100)


WS Probability = 0.9999999995746522
Cleveland Indians

WS Probability = 0.999999998387364
Houston Astros

WS Probability = 0.9999999903364186
New York Yankees

WS Probability = 0.9999992662311983
Washington Nationals

WS Probability = 0.9999988633545694
Detroit Tigers

WS Probability = 0.9999983307911819
Baltimore Orioles

WS Probability = 0.9999889154144862
Boston Red Sox

WS Probability = 0.9999657737694823
Seattle Mariners

WS Probability = 0.9997885625821962
Minnesota Twins

WS Probability = 0.9972718569229063
Kansas City Royals

WS Probability = 0.9917879084322806
Chicago White Sox

WS Probability = 0.988943226452035
Arizona Diamondbacks

WS Probability = 0.988459596354887
Atlanta Braves

WS Probability = 0.9393819808750354
Miami Marlins

WS Probability = 0.536921226562839
Tampa Bay Rays

WS Probability = 0.24311358413646056
Philadelphia Phillies

WS Probability = 0.15244036533032884
Los Angeles Dodgers

WS Probability = 0.12291803468857221
San Francisco Giants

WS Probability = 

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


#### STEP4: Grid Search Model--SVC.

In [17]:
def grid_search_svc(X_train, X_test, y_train, y_test):
    '''
    INPUT: 
    -X_train = scaled X train data.
    -X_test = scaled X test data.
    -y_train = y train data.
    -y_test = y test data.
    
    OUTPUT:
    -classification report (has F1 score, precision and recall).
    -grid = saved model for prediction. 
    
    DESCRIPTION:
    -the scaled and split data is put through a grid search with svc.
    -the model is trained.
    -a prediction is made.
    -print out the classification report and give the model.
    '''
    
    # set up svc model.
    model = SVC(kernel="rbf", probability=True)

    # create gridsearch estimator.
    param_grid = {"C": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                 "gamma": [0.0001, 0.001, 0.01, 0.1]}
    grid = GridSearchCV(model, param_grid, verbose=3)

    # fit the model.
    grid.fit(X_train, y_train)

    # predict.
    prediction = grid.predict(X_test)
    
    # print out the basic information about the grid search.
    print (grid.best_params_)
    print (grid.best_score_)
    print (grid.best_estimator_)
    
    grid = grid.best_estimator_
    predictions = grid.predict(X_test)
    print (classification_report(y_test, prediction, target_names=["0", "1"]))
    
    return grid

In [18]:
# for 1 part 0 to 1 part 1
model_100 = grid_search_svc(X_train_100, X_test_100, y_train_100, y_test_100)

Fitting 3 folds for each of 28 candidates, totalling 84 fits
[CV] C=0.0001, gamma=0.0001 ..........................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] . C=0.0001, gamma=0.0001, score=0.6338185890257558, total=   1.0s
[CV] C=0.0001, gamma=0.0001 ..........................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.2s remaining:    0.0s


[CV] . C=0.0001, gamma=0.0001, score=0.6338185890257558, total=   1.0s
[CV] C=0.0001, gamma=0.0001 ..........................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    2.4s remaining:    0.0s


[CV] . C=0.0001, gamma=0.0001, score=0.6334080717488789, total=   1.0s
[CV] C=0.0001, gamma=0.001 ...........................................
[CV] .. C=0.0001, gamma=0.001, score=0.6338185890257558, total=   1.0s
[CV] C=0.0001, gamma=0.001 ...........................................
[CV] .. C=0.0001, gamma=0.001, score=0.6338185890257558, total=   1.0s
[CV] C=0.0001, gamma=0.001 ...........................................
[CV] .. C=0.0001, gamma=0.001, score=0.6334080717488789, total=   1.0s
[CV] C=0.0001, gamma=0.01 ............................................
[CV] ... C=0.0001, gamma=0.01, score=0.6338185890257558, total=   1.0s
[CV] C=0.0001, gamma=0.01 ............................................
[CV] ... C=0.0001, gamma=0.01, score=0.6338185890257558, total=   1.0s
[CV] C=0.0001, gamma=0.01 ............................................
[CV] ... C=0.0001, gamma=0.01, score=0.6334080717488789, total=   1.0s
[CV] C=0.0001, gamma=0.1 .............................................
[CV] .

[CV] ..... C=10, gamma=0.0001, score=0.7681970884658454, total=   1.0s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ...... C=10, gamma=0.0001, score=0.734602463605823, total=   1.0s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ..... C=10, gamma=0.0001, score=0.7455156950672646, total=   1.0s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...... C=10, gamma=0.001, score=0.7950727883538634, total=   0.9s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...... C=10, gamma=0.001, score=0.8163493840985442, total=   0.9s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...... C=10, gamma=0.001, score=0.8307174887892377, total=   0.9s
[CV] C=10, gamma=0.01 ................................................
[CV] ....... C=10, gamma=0.01, score=0.9686450167973124, total=   0.4s
[CV] C=10, gamma=0.01 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done  84 out of  84 | elapsed:  1.5min finished


{'C': 1, 'gamma': 0.1}
0.9992531740104555
SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       356
           1       1.00      1.00      1.00       537

   micro avg       1.00      1.00      1.00       893
   macro avg       1.00      1.00      1.00       893
weighted avg       1.00      1.00      1.00       893



In [19]:
# for 1 part 0 to 0.5 part 1
model_50 = grid_search_svc(X_train_50, X_test_50, y_train_50, y_test_50)

Fitting 3 folds for each of 28 candidates, totalling 84 fits
[CV] C=0.0001, gamma=0.0001 ..........................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] . C=0.0001, gamma=0.0001, score=0.5439739413680782, total=   0.6s
[CV] C=0.0001, gamma=0.0001 ..........................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s


[CV] . C=0.0001, gamma=0.0001, score=0.5432300163132137, total=   0.6s
[CV] C=0.0001, gamma=0.0001 ..........................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.4s remaining:    0.0s


[CV] . C=0.0001, gamma=0.0001, score=0.5432300163132137, total=   0.6s
[CV] C=0.0001, gamma=0.001 ...........................................
[CV] .. C=0.0001, gamma=0.001, score=0.5439739413680782, total=   0.6s
[CV] C=0.0001, gamma=0.001 ...........................................
[CV] .. C=0.0001, gamma=0.001, score=0.5432300163132137, total=   0.6s
[CV] C=0.0001, gamma=0.001 ...........................................
[CV] .. C=0.0001, gamma=0.001, score=0.5432300163132137, total=   0.6s
[CV] C=0.0001, gamma=0.01 ............................................
[CV] ... C=0.0001, gamma=0.01, score=0.5439739413680782, total=   0.6s
[CV] C=0.0001, gamma=0.01 ............................................
[CV] ... C=0.0001, gamma=0.01, score=0.5432300163132137, total=   0.6s
[CV] C=0.0001, gamma=0.01 ............................................
[CV] ... C=0.0001, gamma=0.01, score=0.5432300163132137, total=   0.6s
[CV] C=0.0001, gamma=0.1 .............................................
[CV] .

[CV] ..... C=10, gamma=0.0001, score=0.6824104234527687, total=   0.5s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ..... C=10, gamma=0.0001, score=0.6851549755301795, total=   0.5s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ..... C=10, gamma=0.0001, score=0.7014681892332789, total=   0.5s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...... C=10, gamma=0.001, score=0.7785016286644951, total=   0.5s
[CV] C=10, gamma=0.001 ...............................................
[CV] ....... C=10, gamma=0.001, score=0.763458401305057, total=   0.5s
[CV] C=10, gamma=0.001 ...............................................
[CV] ....... C=10, gamma=0.001, score=0.797716150081566, total=   0.5s
[CV] C=10, gamma=0.01 ................................................
[CV] ....... C=10, gamma=0.01, score=0.9462540716612378, total=   0.3s
[CV] C=10, gamma=0.01 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done  84 out of  84 | elapsed:   51.5s finished


{'C': 1, 'gamma': 0.1}
0.9983695652173913
SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       337
           1       1.00      1.00      1.00       277

   micro avg       1.00      1.00      1.00       614
   macro avg       1.00      1.00      1.00       614
weighted avg       1.00      1.00      1.00       614



In [20]:
# for 1 part 0 to 0.25 part 1
model_25 = grid_search_svc(X_train_25, X_test_25, y_train_25, y_test_25)

Fitting 3 folds for each of 28 candidates, totalling 84 fits
[CV] C=0.0001, gamma=0.0001 ..........................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ... C=0.0001, gamma=0.0001, score=0.70042194092827, total=   0.2s
[CV] C=0.0001, gamma=0.0001 ..........................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s


[CV] ... C=0.0001, gamma=0.0001, score=0.70042194092827, total=   0.2s
[CV] C=0.0001, gamma=0.0001 ..........................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.5s remaining:    0.0s


[CV] ... C=0.0001, gamma=0.0001, score=0.70042194092827, total=   0.2s
[CV] C=0.0001, gamma=0.001 ...........................................
[CV] .... C=0.0001, gamma=0.001, score=0.70042194092827, total=   0.2s
[CV] C=0.0001, gamma=0.001 ...........................................
[CV] .... C=0.0001, gamma=0.001, score=0.70042194092827, total=   0.2s
[CV] C=0.0001, gamma=0.001 ...........................................
[CV] .... C=0.0001, gamma=0.001, score=0.70042194092827, total=   0.2s
[CV] C=0.0001, gamma=0.01 ............................................
[CV] ..... C=0.0001, gamma=0.01, score=0.70042194092827, total=   0.2s
[CV] C=0.0001, gamma=0.01 ............................................
[CV] ..... C=0.0001, gamma=0.01, score=0.70042194092827, total=   0.2s
[CV] C=0.0001, gamma=0.01 ............................................
[CV] ..... C=0.0001, gamma=0.01, score=0.70042194092827, total=   0.2s
[CV] C=0.0001, gamma=0.1 .............................................
[CV] .

[CV] ....... C=10, gamma=0.0001, score=0.70042194092827, total=   0.3s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ....... C=10, gamma=0.0001, score=0.70042194092827, total=   0.3s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ....... C=10, gamma=0.0001, score=0.70042194092827, total=   0.3s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...... C=10, gamma=0.001, score=0.7510548523206751, total=   0.3s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...... C=10, gamma=0.001, score=0.7784810126582279, total=   0.3s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...... C=10, gamma=0.001, score=0.7320675105485233, total=   0.3s
[CV] C=10, gamma=0.01 ................................................
[CV] ........ C=10, gamma=0.01, score=0.930379746835443, total=   0.2s
[CV] C=10, gamma=0.01 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done  84 out of  84 | elapsed:   25.3s finished


{'C': 1, 'gamma': 0.1}
0.9929676511954993
SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       341
           1       1.00      1.00      1.00       133

   micro avg       1.00      1.00      1.00       474
   macro avg       1.00      1.00      1.00       474
weighted avg       1.00      1.00      1.00       474



Huh.  That's pretty good.  Try out model_100 and model_50 with the 2016-2018 stuff.

#### STEP5: Predict 2016-2018 winners with SVC Grid Search.

In [21]:
# predict for 2018.
predict_the_winner(model_100, 2018, team_data, X_train_100)


WS Probability = 0.0006656973612725744
Atlanta Braves

WS Probability = 0.0004539294524904119
Washington Nationals

WS Probability = 0.0004437481695900674
Los Angeles Angels

WS Probability = 0.00042906066121102353
Arizona Diamondbacks

WS Probability = 0.00040086544094272334
Cleveland Indians

WS Probability = 0.0003970627246123335
Chicago Cubs

WS Probability = 0.0003922913819813293
Boston Red Sox

WS Probability = 0.0003860681209900918
New York Yankees

WS Probability = 0.0003553660040414194
Texas Rangers

WS Probability = 0.0003482283550229903
Minnesota Twins

WS Probability = 0.00034481190294273104
Oakland Athletics

WS Probability = 0.00032924616933615957
Houston Astros

WS Probability = 0.0003258598014238309
St. Louis Cardinals

WS Probability = 0.000324353982789271
Tampa Bay Rays

WS Probability = 0.0003145214764987078
Los Angeles Dodgers

WS Probability = 0.0003115962074101451
Detroit Tigers

WS Probability = 0.00031158144388175536
Cincinnati Reds

WS Probability = 0.00030883

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [22]:
# predict for 2017.
predict_the_winner(model_100, 2017, team_data, X_train_100)


WS Probability = 0.0005126552364382324
Washington Nationals

WS Probability = 0.00046884542727815997
Tampa Bay Rays

WS Probability = 0.0004290790118414186
Atlanta Braves

WS Probability = 0.0004234012300753116
Seattle Mariners

WS Probability = 0.00037530520918015344
Los Angeles Angels

WS Probability = 0.0003408763586649337
Colorado Rockies

WS Probability = 0.0003380141699354849
Boston Red Sox

WS Probability = 0.00033581009447122487
Milwaukee Brewers

WS Probability = 0.00033545587431374973
Houston Astros

WS Probability = 0.0003332256635911475
Philadelphia Phillies

WS Probability = 0.00032895820214669377
Minnesota Twins

WS Probability = 0.0003264195588750959
Chicago White Sox

WS Probability = 0.00032512763226039815
Los Angeles Dodgers

WS Probability = 0.00031973709098426743
New York Yankees

WS Probability = 0.0003160726688806332
Baltimore Orioles

WS Probability = 0.0003138540200045984
Cleveland Indians

WS Probability = 0.0003110495060042827
New York Mets

WS Probability = 

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [23]:
# predict for 2016
predict_the_winner(model_100, 2016, team_data, X_train_100)


WS Probability = 0.0008164672513890737
Seattle Mariners

WS Probability = 0.0006049129427353949
Chicago Cubs

WS Probability = 0.0006001074149606276
Washington Nationals

WS Probability = 0.0005312740495923533
Arizona Diamondbacks

WS Probability = 0.0004048566496174574
St. Louis Cardinals

WS Probability = 0.0004004400543325232
Minnesota Twins

WS Probability = 0.00039002043644821577
Philadelphia Phillies

WS Probability = 0.000380243352460748
Tampa Bay Rays

WS Probability = 0.0003636797359190933
Houston Astros

WS Probability = 0.0003462601489015429
New York Yankees

WS Probability = 0.00034554431549241316
Los Angeles Angels

WS Probability = 0.00033500044603511245
Milwaukee Brewers

WS Probability = 0.0003343169928296628
Colorado Rockies

WS Probability = 0.00032227741693887876
Cincinnati Reds

WS Probability = 0.00031450737262955894
Los Angeles Dodgers

WS Probability = 0.00031290349260387984
Boston Red Sox

WS Probability = 0.00031199419364210323
Cleveland Indians

WS Probabilit

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [24]:
# predict for 2018
predict_the_winner(model_50, 2018, team_data, X_train_50)


WS Probability = 0.0004971510088878972
Atlanta Braves

WS Probability = 0.0003551640221267637
Cleveland Indians

WS Probability = 0.0003460326636683743
Boston Red Sox

WS Probability = 0.00034465138210575856
Los Angeles Angels

WS Probability = 0.00033243005152387646
New York Yankees

WS Probability = 0.0003250582396460149
Minnesota Twins

WS Probability = 0.00031852500485931375
Arizona Diamondbacks

WS Probability = 0.00031392237967062817
Washington Nationals

WS Probability = 0.0002951948926613665
Chicago Cubs

WS Probability = 0.0002830416648474986
Houston Astros

WS Probability = 0.0002760236433107835
Tampa Bay Rays

WS Probability = 0.0002736265118174676
Texas Rangers

WS Probability = 0.0002656684966822556
Los Angeles Dodgers

WS Probability = 0.00026389516470641766
Oakland Athletics

WS Probability = 0.0002635149958974508
Baltimore Orioles

WS Probability = 0.0002629980921827445
Detroit Tigers

WS Probability = 0.00026217164520587485
Cincinnati Reds

WS Probability = 0.00026162

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [28]:
# predict for 2017
predict_the_winner(model_50, 2017, team_data, X_train_50)


WS Probability = 0.0004224316448471586
Tampa Bay Rays

WS Probability = 0.0003898933905903332
Washington Nationals

WS Probability = 0.0003442135439656621
Seattle Mariners

WS Probability = 0.00034205224108193493
Atlanta Braves

WS Probability = 0.00030552292583755427
Los Angeles Angels

WS Probability = 0.00028866887960417683
Boston Red Sox

WS Probability = 0.00028740669941666643
Chicago White Sox

WS Probability = 0.0002832513570653311
Houston Astros

WS Probability = 0.00028089515102259604
Colorado Rockies

WS Probability = 0.00028085681881461687
Milwaukee Brewers

WS Probability = 0.00027677244084653695
Philadelphia Phillies

WS Probability = 0.00027588864983305755
Los Angeles Dodgers

WS Probability = 0.00027158293954191245
New York Yankees

WS Probability = 0.0002705201242209517
Baltimore Orioles

WS Probability = 0.000268868808915833
Minnesota Twins

WS Probability = 0.00026674143614084656
Cleveland Indians

WS Probability = 0.0002616966061593246
Miami Marlins

WS Probability 

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [27]:
# predict for 2016
predict_the_winner(model_50, 2016, team_data, X_train_50)


WS Probability = 0.0005880361702190412
Seattle Mariners

WS Probability = 0.000551829718297707
Washington Nationals

WS Probability = 0.00045936361190765965
Arizona Diamondbacks

WS Probability = 0.0004437252026443765
Chicago Cubs

WS Probability = 0.0003223559938734053
Houston Astros

WS Probability = 0.0003191804192746555
Minnesota Twins

WS Probability = 0.0003177349658629535
Tampa Bay Rays

WS Probability = 0.00029884769861645736
New York Yankees

WS Probability = 0.00029789801165561885
Philadelphia Phillies

WS Probability = 0.00028458029010616635
St. Louis Cardinals

WS Probability = 0.0002782313856715414
Milwaukee Brewers

WS Probability = 0.00027135475814194686
Los Angeles Angels

WS Probability = 0.0002668688398981177
Los Angeles Dodgers

WS Probability = 0.0002636998920959852
Boston Red Sox

WS Probability = 0.00026341713390838915
Cleveland Indians

WS Probability = 0.0002607971875192644
Cincinnati Reds

WS Probability = 0.00026008738808614833
San Diego Padres

WS Probabilit

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


This is not better.  The logistic regression without grid and upsampling works better than either upsampled thing.