## Purpose: Try different models-- Part6.
### forest

In [1]:
# import dependencies.
import pandas as pd
import numpy as np

from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV

#### STEP1: Read in dataset.  Remove data from 2016-2019.
- data from 2016-2018 will be used to bs test the model.
- data from 2019 will be used to predict the winners of the 2019 WS.

In [2]:
# read in the data.
team_data = pd.read_csv("../../Resources/clean_data_1969.csv")
del team_data["Unnamed: 0"]
team_data.head()

Unnamed: 0,team,year,A,DP,E,G2,GS2,INN,PB,PO,...,SHO,SO1,SV,SVO,TBF,W,WHIP,WP,WPCT,winners
0,St. Louis Cardinals,2019,1033,114,43,104,936,8313.0,3,2771,...,4,895,33,42,3896,56,1.29,21,0.538,0
1,Arizona Diamondbacks,2019,1010,83,45,105,945,8538.0,2,2846,...,7,925,24,37,4001,53,1.28,35,0.505,0
2,Kansas City Royals,2019,990,105,45,106,954,8421.0,6,2807,...,5,816,24,41,4125,39,1.46,34,0.368,0
3,Houston Astros,2019,875,54,50,106,954,8589.0,6,2863,...,7,1074,27,42,3929,67,1.14,31,0.632,0
4,Tampa Bay Rays,2019,975,92,53,107,963,8760.0,11,2920,...,6,1037,26,43,3985,59,1.16,40,0.551,0


In [3]:
# remove data from 2016 through 2019.
team_data_new = team_data.loc[team_data["year"] < 2016]
team_data_new.head()

Unnamed: 0,team,year,A,DP,E,G2,GS2,INN,PB,PO,...,SHO,SO1,SV,SVO,TBF,W,WHIP,WP,WPCT,winners
120,San Francisco Giants,2015,1639,136,72,162,1458,13143.0,6,4381,...,11,1309,43,72,6048,87,1.21,40,0.537,0
121,Washington Nationals,2015,1425,142,73,162,1458,13137.0,17,4379,...,12,1476,46,60,6036,95,1.19,47,0.586,0
122,Houston Astros,2015,1599,135,77,162,1458,13212.0,18,4404,...,8,1396,44,64,6180,84,1.29,98,0.519,0
123,Detroit Tigers,2015,1537,148,75,161,1449,12852.0,5,4284,...,8,1232,47,66,6048,86,1.32,44,0.534,0
124,Boston Red Sox,2015,1427,139,75,162,1458,12957.0,37,4319,...,5,1362,43,61,6073,93,1.27,52,0.574,0


In [4]:
target = team_data_new["winners"]
features = team_data_new.drop({"team", "year", "winners"}, axis=1)
feature_columns = list(features.columns)
print (target.shape)
print (features.shape)
print (feature_columns)

(1266,)
(1266, 59)
['A', 'DP', 'E', 'G2', 'GS2', 'INN', 'PB', 'PO', 'TC', '2B', '3B', 'AB', 'AO', 'BB', 'CS', 'G', 'GDP', 'H', 'HBP', 'HR', 'IBB', 'NP_x', 'OBP', 'OPS_x', 'PA', 'R', 'RBI', 'SAC', 'SB', 'SF', 'SLG', 'SO', 'TB', 'XBH', 'BB1', 'BK', 'CG', 'ER', 'ERA', 'G1', 'GF', 'GS', 'H1', 'HB', 'HR1', 'IBB1', 'IP', 'L', 'OBP1', 'R1', 'SHO', 'SO1', 'SV', 'SVO', 'TBF', 'W', 'WHIP', 'WP', 'WPCT']


#### STEP2: Split data. Train and test the model.

In [5]:
def tree(features, target):
    '''
    INPUT: 
    -features = dataset without team names and year.
    -target = labels.
    
    OUTPUT:
    -model = model output
    
    DESCRIPTION:
    -dataset is taken in and split into test and train sets.
    -model is fit
    -model is tested
    '''
    
    # split data.
    X_train, X_test, y_train, y_test = train_test_split(features, target)

    # train model.
    model = RandomForestClassifier(n_jobs=-1,
                                   n_estimators=100)
    model.fit(X_train, y_train)

    # scale data.
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.fit_transform(X_test)
    
    # predict.
    prediction = model.predict(X_test)

    print (classification_report(y_test, prediction, target_names=["0", "1"]))
    return model

In [6]:
model = tree(features, target)

              precision    recall  f1-score   support

           0       0.95      1.00      0.98       302
           1       0.00      0.00      0.00        15

   micro avg       0.95      0.95      0.95       317
   macro avg       0.48      0.50      0.49       317
weighted avg       0.91      0.95      0.93       317



  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  'precision', 'predicted', average, warn_for)


This is terrible.  Let's try upsampling.

#### STEP3: Upsampling

In [7]:
# upsample for a more balanced dataset.
def upsample(dataset, no_samples):
    '''
    INPUT: 
    -dataset = dataset without team names and year.
    -n_samples = number of minority_unsampled.
    
    OUTPUT:
    -X_train_scaled = scaled X train data.
    -X_test_scaled = scaled X test data.
    -y_train = y train data
    -y_test = y test data
    
    DESCRIPTION:
    -dataset is taken in and split into minority and majority classes.
    -dataset is then upsampled for the mainority class
    -split the data into features and targets
    -split data into train and test sets
    -train and test sets were are scaled.
    '''
    
    # separate majority and minority classes.
    df_majority = dataset.loc[dataset["winners"] == 0]
    df_minority = dataset.loc[dataset["winners"] == 1]

    # upsample minority class.
    df_minority_unsampled = resample(df_minority,
                                    replace=True,
                                    n_samples=no_samples,
                                    random_state=123)

    # combine majority class with upsampled minority class.
    df_upsampled = pd.concat([df_majority, df_minority_unsampled])

    # separate features and target.
    y = df_upsampled["winners"]
    X = df_upsampled[feature_columns]
    
    model = tree(X, y)
    return model

In [8]:
model_100 = upsample(team_data_new, 2234)
print ("_______________________________")
model_50 = upsample(team_data_new, 1117)
print ("_______________________________")
model_25 = upsample(team_data_new, 559)
print ("_______________________________")
model_12 = upsample(team_data_new, 250)
print ("_______________________________")
model_6 = upsample(team_data_new, 200)
print ("_______________________________")
model_3 = upsample(team_data_new, 150)


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


              precision    recall  f1-score   support

           0       1.00      0.99      1.00       314
           1       0.99      1.00      1.00       550

   micro avg       1.00      1.00      1.00       864
   macro avg       1.00      1.00      1.00       864
weighted avg       1.00      1.00      1.00       864

_______________________________


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


              precision    recall  f1-score   support

           0       1.00      1.00      1.00       305
           1       1.00      1.00      1.00       280

   micro avg       1.00      1.00      1.00       585
   macro avg       1.00      1.00      1.00       585
weighted avg       1.00      1.00      1.00       585

_______________________________


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


              precision    recall  f1-score   support

           0       1.00      1.00      1.00       303
           1       0.99      1.00      1.00       142

   micro avg       1.00      1.00      1.00       445
   macro avg       1.00      1.00      1.00       445
weighted avg       1.00      1.00      1.00       445

_______________________________


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


              precision    recall  f1-score   support

           0       1.00      1.00      1.00       308
           1       1.00      1.00      1.00        60

   micro avg       1.00      1.00      1.00       368
   macro avg       1.00      1.00      1.00       368
weighted avg       1.00      1.00      1.00       368

_______________________________


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


              precision    recall  f1-score   support

           0       1.00      1.00      1.00       309
           1       1.00      0.98      0.99        46

   micro avg       1.00      1.00      1.00       355
   macro avg       1.00      0.99      0.99       355
weighted avg       1.00      1.00      1.00       355

_______________________________
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       304
           1       1.00      0.90      0.95        39

   micro avg       0.99      0.99      0.99       343
   macro avg       0.99      0.95      0.97       343
weighted avg       0.99      0.99      0.99       343



  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


Probably overfit.

In [9]:
def predict_the_winner(model, year, team_data):
    '''
    INPUT: 
    -X_train = scaled X train data.
    -model = the saved model.
    -team_data = complete dataframe with all data.
    -year = the year want to look at.
    
    OUTPUT:
    -printed prediction.
    
    DESCRIPTION:
    -data from year of interest is isolated.
    -the data are scaled.
    -the prediction is made.
    -print out the resulting probability and the name of the team.
    '''
    
    # grab the data.
    team_data = team_data.loc[team_data["year"] == year].reset_index()

    # set features (no team, year, winners).
    # set target (winners).
    features = team_data[feature_columns]
    
    # fit the model.
    probabilities = model.predict_proba(features)

    # convert predictions to datafram.e
    WS_predictions = pd.DataFrame(probabilities[:,1])

    # Sort the DataFrame (descending)
    WS_predictions = WS_predictions.sort_values(0, ascending=False)

    WS_predictions['Probability'] = WS_predictions[0]

    # Print 50 highest probability HoF inductees from still eligible players
    for i, row in WS_predictions.head(50).iterrows():
       prob = ' '.join(('WS Probability =', str(row['Probability'])))
       print('')
       print(prob)
       print(team_data.iloc[i,1:27]["team"])

In [10]:
predict_the_winner(model_100, 2018, team_data)


WS Probability = 0.11
Texas Rangers

WS Probability = 0.1
Pittsburgh Pirates

WS Probability = 0.09
Milwaukee Brewers

WS Probability = 0.08
Colorado Rockies

WS Probability = 0.08
Philadelphia Phillies

WS Probability = 0.08
Los Angeles Angels

WS Probability = 0.07
Atlanta Braves

WS Probability = 0.07
Baltimore Orioles

WS Probability = 0.07
Detroit Tigers

WS Probability = 0.07
Kansas City Royals

WS Probability = 0.07
San Francisco Giants

WS Probability = 0.06
Oakland Athletics

WS Probability = 0.06
Chicago White Sox

WS Probability = 0.06
Toronto Blue Jays

WS Probability = 0.06
Washington Nationals

WS Probability = 0.06
Arizona Diamondbacks

WS Probability = 0.06
Boston Red Sox

WS Probability = 0.05
San Diego Padres

WS Probability = 0.05
New York Mets

WS Probability = 0.05
St. Louis Cardinals

WS Probability = 0.05
Chicago Cubs

WS Probability = 0.05
Tampa Bay Rays

WS Probability = 0.05
Houston Astros

WS Probability = 0.04
New York Yankees

WS Probability = 0.04
Seattle

In [11]:
predict_the_winner(model_50, 2018, team_data)


WS Probability = 0.25
Texas Rangers

WS Probability = 0.18
Pittsburgh Pirates

WS Probability = 0.17
Boston Red Sox

WS Probability = 0.17
Kansas City Royals

WS Probability = 0.16
Detroit Tigers

WS Probability = 0.16
Milwaukee Brewers

WS Probability = 0.15
Baltimore Orioles

WS Probability = 0.14
Los Angeles Angels

WS Probability = 0.13
Toronto Blue Jays

WS Probability = 0.11
Philadelphia Phillies

WS Probability = 0.11
San Francisco Giants

WS Probability = 0.1
New York Yankees

WS Probability = 0.1
Chicago Cubs

WS Probability = 0.1
Seattle Mariners

WS Probability = 0.1
Atlanta Braves

WS Probability = 0.1
Oakland Athletics

WS Probability = 0.09
Arizona Diamondbacks

WS Probability = 0.09
Colorado Rockies

WS Probability = 0.09
Chicago White Sox

WS Probability = 0.08
Washington Nationals

WS Probability = 0.08
New York Mets

WS Probability = 0.08
San Diego Padres

WS Probability = 0.07
Miami Marlins

WS Probability = 0.07
Tampa Bay Rays

WS Probability = 0.06
Los Angeles Dod

In [12]:
predict_the_winner(model_25, 2018, team_data)


WS Probability = 0.29
Texas Rangers

WS Probability = 0.27
Los Angeles Angels

WS Probability = 0.26
Atlanta Braves

WS Probability = 0.25
Toronto Blue Jays

WS Probability = 0.24
Kansas City Royals

WS Probability = 0.23
Chicago White Sox

WS Probability = 0.23
Baltimore Orioles

WS Probability = 0.22
Philadelphia Phillies

WS Probability = 0.22
Pittsburgh Pirates

WS Probability = 0.22
Boston Red Sox

WS Probability = 0.22
Detroit Tigers

WS Probability = 0.2
Milwaukee Brewers

WS Probability = 0.2
New York Yankees

WS Probability = 0.19
Washington Nationals

WS Probability = 0.18
Colorado Rockies

WS Probability = 0.18
Arizona Diamondbacks

WS Probability = 0.17
Minnesota Twins

WS Probability = 0.17
Seattle Mariners

WS Probability = 0.16
San Francisco Giants

WS Probability = 0.15
New York Mets

WS Probability = 0.15
Oakland Athletics

WS Probability = 0.14
Cincinnati Reds

WS Probability = 0.14
San Diego Padres

WS Probability = 0.13
Cleveland Indians

WS Probability = 0.13
Miam

In [13]:
predict_the_winner(model_12, 2018, team_data)


WS Probability = 0.33
Los Angeles Angels

WS Probability = 0.3
Texas Rangers

WS Probability = 0.27
Arizona Diamondbacks

WS Probability = 0.25
Milwaukee Brewers

WS Probability = 0.25
Toronto Blue Jays

WS Probability = 0.24
Pittsburgh Pirates

WS Probability = 0.24
New York Yankees

WS Probability = 0.24
Baltimore Orioles

WS Probability = 0.23
Minnesota Twins

WS Probability = 0.23
Cleveland Indians

WS Probability = 0.22
Oakland Athletics

WS Probability = 0.22
Atlanta Braves

WS Probability = 0.21
Houston Astros

WS Probability = 0.21
Los Angeles Dodgers

WS Probability = 0.2
St. Louis Cardinals

WS Probability = 0.2
New York Mets

WS Probability = 0.19
Washington Nationals

WS Probability = 0.19
Colorado Rockies

WS Probability = 0.19
Chicago White Sox

WS Probability = 0.19
Tampa Bay Rays

WS Probability = 0.19
Kansas City Royals

WS Probability = 0.19
Cincinnati Reds

WS Probability = 0.18
Philadelphia Phillies

WS Probability = 0.17
Detroit Tigers

WS Probability = 0.17
Chica

In [14]:
predict_the_winner(model_6, 2018, team_data)


WS Probability = 0.36
Los Angeles Angels

WS Probability = 0.27
Baltimore Orioles

WS Probability = 0.27
Texas Rangers

WS Probability = 0.27
Toronto Blue Jays

WS Probability = 0.26
Boston Red Sox

WS Probability = 0.26
Chicago White Sox

WS Probability = 0.25
Arizona Diamondbacks

WS Probability = 0.25
Detroit Tigers

WS Probability = 0.25
Philadelphia Phillies

WS Probability = 0.25
Atlanta Braves

WS Probability = 0.24
Milwaukee Brewers

WS Probability = 0.24
Pittsburgh Pirates

WS Probability = 0.22
Cleveland Indians

WS Probability = 0.22
Oakland Athletics

WS Probability = 0.22
Tampa Bay Rays

WS Probability = 0.22
San Francisco Giants

WS Probability = 0.21
New York Yankees

WS Probability = 0.21
San Diego Padres

WS Probability = 0.21
Kansas City Royals

WS Probability = 0.21
Los Angeles Dodgers

WS Probability = 0.21
New York Mets

WS Probability = 0.2
Houston Astros

WS Probability = 0.19
St. Louis Cardinals

WS Probability = 0.19
Miami Marlins

WS Probability = 0.18
Minnes

In [15]:
predict_the_winner(model_3, 2018, team_data)


WS Probability = 0.35
Los Angeles Angels

WS Probability = 0.33
Arizona Diamondbacks

WS Probability = 0.32
Boston Red Sox

WS Probability = 0.31
Toronto Blue Jays

WS Probability = 0.3
Cincinnati Reds

WS Probability = 0.29
Texas Rangers

WS Probability = 0.29
Oakland Athletics

WS Probability = 0.29
Atlanta Braves

WS Probability = 0.28
Detroit Tigers

WS Probability = 0.28
San Francisco Giants

WS Probability = 0.28
Los Angeles Dodgers

WS Probability = 0.28
Minnesota Twins

WS Probability = 0.28
Milwaukee Brewers

WS Probability = 0.28
Baltimore Orioles

WS Probability = 0.27
New York Yankees

WS Probability = 0.26
Washington Nationals

WS Probability = 0.26
Houston Astros

WS Probability = 0.26
Chicago White Sox

WS Probability = 0.26
Pittsburgh Pirates

WS Probability = 0.25
Tampa Bay Rays

WS Probability = 0.24
New York Mets

WS Probability = 0.24
St. Louis Cardinals

WS Probability = 0.24
Seattle Mariners

WS Probability = 0.23
Miami Marlins

WS Probability = 0.23
Kansas City 

In [16]:
predict_the_winner(model_100, 2017, team_data)


WS Probability = 0.11
Pittsburgh Pirates

WS Probability = 0.09
Baltimore Orioles

WS Probability = 0.08
New York Mets

WS Probability = 0.08
San Francisco Giants

WS Probability = 0.05
Cleveland Indians

WS Probability = 0.05
Washington Nationals

WS Probability = 0.04
Boston Red Sox

WS Probability = 0.04
Atlanta Braves

WS Probability = 0.04
Chicago Cubs

WS Probability = 0.04
Seattle Mariners

WS Probability = 0.04
Toronto Blue Jays

WS Probability = 0.04
Detroit Tigers

WS Probability = 0.03
Los Angeles Angels

WS Probability = 0.03
Chicago White Sox

WS Probability = 0.03
Colorado Rockies

WS Probability = 0.02
Milwaukee Brewers

WS Probability = 0.02
Oakland Athletics

WS Probability = 0.02
Minnesota Twins

WS Probability = 0.01
Philadelphia Phillies

WS Probability = 0.01
Los Angeles Dodgers

WS Probability = 0.01
Houston Astros

WS Probability = 0.01
Cincinnati Reds

WS Probability = 0.01
Miami Marlins

WS Probability = 0.01
Kansas City Royals

WS Probability = 0.01
Arizona D

In [17]:
predict_the_winner(model_50, 2017, team_data)


WS Probability = 0.11
Pittsburgh Pirates

WS Probability = 0.09
San Francisco Giants

WS Probability = 0.09
Cleveland Indians

WS Probability = 0.08
Washington Nationals

WS Probability = 0.08
Boston Red Sox

WS Probability = 0.07
Baltimore Orioles

WS Probability = 0.06
Chicago Cubs

WS Probability = 0.06
Toronto Blue Jays

WS Probability = 0.06
Los Angeles Angels

WS Probability = 0.06
Arizona Diamondbacks

WS Probability = 0.05
Tampa Bay Rays

WS Probability = 0.05
Philadelphia Phillies

WS Probability = 0.05
New York Mets

WS Probability = 0.05
Detroit Tigers

WS Probability = 0.05
Colorado Rockies

WS Probability = 0.04
Miami Marlins

WS Probability = 0.04
Seattle Mariners

WS Probability = 0.04
Texas Rangers

WS Probability = 0.04
Milwaukee Brewers

WS Probability = 0.03
Atlanta Braves

WS Probability = 0.03
New York Yankees

WS Probability = 0.03
Chicago White Sox

WS Probability = 0.03
Oakland Athletics

WS Probability = 0.02
St. Louis Cardinals

WS Probability = 0.02
Los Ange

In [18]:
predict_the_winner(model_25, 2017, team_data)


WS Probability = 0.2
Boston Red Sox

WS Probability = 0.16
Washington Nationals

WS Probability = 0.16
Cleveland Indians

WS Probability = 0.14
Tampa Bay Rays

WS Probability = 0.14
Los Angeles Dodgers

WS Probability = 0.11
Chicago Cubs

WS Probability = 0.11
Atlanta Braves

WS Probability = 0.1
Baltimore Orioles

WS Probability = 0.1
New York Mets

WS Probability = 0.1
Oakland Athletics

WS Probability = 0.1
New York Yankees

WS Probability = 0.09
Detroit Tigers

WS Probability = 0.09
Pittsburgh Pirates

WS Probability = 0.09
Los Angeles Angels

WS Probability = 0.09
Arizona Diamondbacks

WS Probability = 0.09
Houston Astros

WS Probability = 0.08
Milwaukee Brewers

WS Probability = 0.08
San Francisco Giants

WS Probability = 0.07
Toronto Blue Jays

WS Probability = 0.06
Minnesota Twins

WS Probability = 0.06
Seattle Mariners

WS Probability = 0.05
Colorado Rockies

WS Probability = 0.03
Miami Marlins

WS Probability = 0.03
Chicago White Sox

WS Probability = 0.02
San Diego Padres



In [19]:
predict_the_winner(model_12, 2017, team_data)


WS Probability = 0.2
Washington Nationals

WS Probability = 0.18
Los Angeles Dodgers

WS Probability = 0.18
Boston Red Sox

WS Probability = 0.18
Cleveland Indians

WS Probability = 0.17
New York Yankees

WS Probability = 0.16
Houston Astros

WS Probability = 0.16
Tampa Bay Rays

WS Probability = 0.12
Pittsburgh Pirates

WS Probability = 0.12
Chicago Cubs

WS Probability = 0.12
Oakland Athletics

WS Probability = 0.11
Minnesota Twins

WS Probability = 0.09
Toronto Blue Jays

WS Probability = 0.09
Los Angeles Angels

WS Probability = 0.09
New York Mets

WS Probability = 0.08
St. Louis Cardinals

WS Probability = 0.08
Atlanta Braves

WS Probability = 0.07
Seattle Mariners

WS Probability = 0.07
Detroit Tigers

WS Probability = 0.06
Arizona Diamondbacks

WS Probability = 0.05
Chicago White Sox

WS Probability = 0.04
Colorado Rockies

WS Probability = 0.04
Milwaukee Brewers

WS Probability = 0.03
Baltimore Orioles

WS Probability = 0.03
Texas Rangers

WS Probability = 0.03
Philadelphia Ph

In [20]:
predict_the_winner(model_6, 2017, team_data)


WS Probability = 0.27
Washington Nationals

WS Probability = 0.25
Los Angeles Dodgers

WS Probability = 0.2
Oakland Athletics

WS Probability = 0.19
Boston Red Sox

WS Probability = 0.18
Cleveland Indians

WS Probability = 0.17
New York Yankees

WS Probability = 0.16
Houston Astros

WS Probability = 0.14
Atlanta Braves

WS Probability = 0.13
Los Angeles Angels

WS Probability = 0.12
Pittsburgh Pirates

WS Probability = 0.12
Arizona Diamondbacks

WS Probability = 0.12
Minnesota Twins

WS Probability = 0.11
Baltimore Orioles

WS Probability = 0.11
New York Mets

WS Probability = 0.1
Chicago Cubs

WS Probability = 0.1
Toronto Blue Jays

WS Probability = 0.1
Tampa Bay Rays

WS Probability = 0.09
Detroit Tigers

WS Probability = 0.08
Milwaukee Brewers

WS Probability = 0.08
Philadelphia Phillies

WS Probability = 0.08
St. Louis Cardinals

WS Probability = 0.08
Seattle Mariners

WS Probability = 0.07
Miami Marlins

WS Probability = 0.07
Chicago White Sox

WS Probability = 0.06
San Francisco

In [21]:
predict_the_winner(model_3, 2017, team_data)


WS Probability = 0.28
Boston Red Sox

WS Probability = 0.25
New York Yankees

WS Probability = 0.24
Los Angeles Dodgers

WS Probability = 0.19
Cleveland Indians

WS Probability = 0.19
Washington Nationals

WS Probability = 0.16
Oakland Athletics

WS Probability = 0.15
Chicago Cubs

WS Probability = 0.14
Minnesota Twins

WS Probability = 0.14
Tampa Bay Rays

WS Probability = 0.13
Houston Astros

WS Probability = 0.12
Milwaukee Brewers

WS Probability = 0.12
Los Angeles Angels

WS Probability = 0.1
Detroit Tigers

WS Probability = 0.09
Atlanta Braves

WS Probability = 0.08
Seattle Mariners

WS Probability = 0.08
New York Mets

WS Probability = 0.08
Arizona Diamondbacks

WS Probability = 0.07
Toronto Blue Jays

WS Probability = 0.06
Baltimore Orioles

WS Probability = 0.06
Chicago White Sox

WS Probability = 0.06
St. Louis Cardinals

WS Probability = 0.05
Miami Marlins

WS Probability = 0.05
Pittsburgh Pirates

WS Probability = 0.05
Philadelphia Phillies

WS Probability = 0.04
San Diego 

In [22]:
predict_the_winner(model_100, 2016, team_data)


WS Probability = 0.09
Cleveland Indians

WS Probability = 0.08
Minnesota Twins

WS Probability = 0.08
Colorado Rockies

WS Probability = 0.07
St. Louis Cardinals

WS Probability = 0.07
Los Angeles Angels

WS Probability = 0.06
Baltimore Orioles

WS Probability = 0.05
Kansas City Royals

WS Probability = 0.05
Washington Nationals

WS Probability = 0.04
Boston Red Sox

WS Probability = 0.04
Pittsburgh Pirates

WS Probability = 0.03
Seattle Mariners

WS Probability = 0.03
Houston Astros

WS Probability = 0.03
Oakland Athletics

WS Probability = 0.03
San Francisco Giants

WS Probability = 0.03
Philadelphia Phillies

WS Probability = 0.02
Tampa Bay Rays

WS Probability = 0.02
Chicago White Sox

WS Probability = 0.02
San Diego Padres

WS Probability = 0.02
Arizona Diamondbacks

WS Probability = 0.02
Chicago Cubs

WS Probability = 0.02
Miami Marlins

WS Probability = 0.02
Detroit Tigers

WS Probability = 0.02
New York Yankees

WS Probability = 0.01
Texas Rangers

WS Probability = 0.01
Los An

In [23]:
predict_the_winner(model_50, 2016, team_data)


WS Probability = 0.11
Kansas City Royals

WS Probability = 0.1
Boston Red Sox

WS Probability = 0.09
Washington Nationals

WS Probability = 0.09
Milwaukee Brewers

WS Probability = 0.09
Minnesota Twins

WS Probability = 0.08
Chicago Cubs

WS Probability = 0.07
Baltimore Orioles

WS Probability = 0.07
Los Angeles Angels

WS Probability = 0.06
Cleveland Indians

WS Probability = 0.06
Colorado Rockies

WS Probability = 0.06
Tampa Bay Rays

WS Probability = 0.05
Houston Astros

WS Probability = 0.05
St. Louis Cardinals

WS Probability = 0.05
Pittsburgh Pirates

WS Probability = 0.04
Arizona Diamondbacks

WS Probability = 0.04
Seattle Mariners

WS Probability = 0.04
New York Yankees

WS Probability = 0.03
Chicago White Sox

WS Probability = 0.03
Toronto Blue Jays

WS Probability = 0.02
Texas Rangers

WS Probability = 0.02
Miami Marlins

WS Probability = 0.02
Los Angeles Dodgers

WS Probability = 0.02
Oakland Athletics

WS Probability = 0.01
Atlanta Braves

WS Probability = 0.01
Detroit Tig

In [24]:
predict_the_winner(model_25, 2016, team_data)


WS Probability = 0.18
Boston Red Sox

WS Probability = 0.17
Los Angeles Angels

WS Probability = 0.17
Houston Astros

WS Probability = 0.16
Washington Nationals

WS Probability = 0.16
Chicago Cubs

WS Probability = 0.15
Los Angeles Dodgers

WS Probability = 0.15
Tampa Bay Rays

WS Probability = 0.15
New York Yankees

WS Probability = 0.12
Baltimore Orioles

WS Probability = 0.11
Cleveland Indians

WS Probability = 0.09
Kansas City Royals

WS Probability = 0.09
Seattle Mariners

WS Probability = 0.07
Oakland Athletics

WS Probability = 0.06
Colorado Rockies

WS Probability = 0.06
Texas Rangers

WS Probability = 0.05
Detroit Tigers

WS Probability = 0.05
San Diego Padres

WS Probability = 0.05
Pittsburgh Pirates

WS Probability = 0.05
Minnesota Twins

WS Probability = 0.05
Toronto Blue Jays

WS Probability = 0.04
St. Louis Cardinals

WS Probability = 0.04
Arizona Diamondbacks

WS Probability = 0.04
Milwaukee Brewers

WS Probability = 0.04
Miami Marlins

WS Probability = 0.03
Cincinnati 

In [25]:
predict_the_winner(model_12, 2016, team_data)


WS Probability = 0.19
Washington Nationals

WS Probability = 0.18
Los Angeles Dodgers

WS Probability = 0.17
New York Yankees

WS Probability = 0.17
Cleveland Indians

WS Probability = 0.15
Houston Astros

WS Probability = 0.15
Seattle Mariners

WS Probability = 0.14
Chicago Cubs

WS Probability = 0.12
Minnesota Twins

WS Probability = 0.11
Toronto Blue Jays

WS Probability = 0.1
Kansas City Royals

WS Probability = 0.09
Tampa Bay Rays

WS Probability = 0.09
Boston Red Sox

WS Probability = 0.07
Milwaukee Brewers

WS Probability = 0.07
Colorado Rockies

WS Probability = 0.06
Arizona Diamondbacks

WS Probability = 0.06
San Diego Padres

WS Probability = 0.05
Texas Rangers

WS Probability = 0.05
Baltimore Orioles

WS Probability = 0.05
Oakland Athletics

WS Probability = 0.05
St. Louis Cardinals

WS Probability = 0.05
Detroit Tigers

WS Probability = 0.03
Philadelphia Phillies

WS Probability = 0.03
Los Angeles Angels

WS Probability = 0.03
Miami Marlins

WS Probability = 0.02
New York 

In [26]:
predict_the_winner(model_6, 2016, team_data)


WS Probability = 0.21
Los Angeles Dodgers

WS Probability = 0.21
Houston Astros

WS Probability = 0.2
Washington Nationals

WS Probability = 0.19
New York Yankees

WS Probability = 0.18
Cleveland Indians

WS Probability = 0.18
Seattle Mariners

WS Probability = 0.17
Minnesota Twins

WS Probability = 0.15
Boston Red Sox

WS Probability = 0.12
Chicago Cubs

WS Probability = 0.11
Kansas City Royals

WS Probability = 0.1
Tampa Bay Rays

WS Probability = 0.1
Detroit Tigers

WS Probability = 0.1
Toronto Blue Jays

WS Probability = 0.08
Baltimore Orioles

WS Probability = 0.08
Arizona Diamondbacks

WS Probability = 0.07
Miami Marlins

WS Probability = 0.07
Philadelphia Phillies

WS Probability = 0.07
Los Angeles Angels

WS Probability = 0.07
Oakland Athletics

WS Probability = 0.06
Colorado Rockies

WS Probability = 0.06
San Francisco Giants

WS Probability = 0.06
Texas Rangers

WS Probability = 0.05
Atlanta Braves

WS Probability = 0.03
New York Mets

WS Probability = 0.03
Milwaukee Brewers

In [27]:
predict_the_winner(model_3, 2016, team_data)


WS Probability = 0.33
New York Yankees

WS Probability = 0.28
Houston Astros

WS Probability = 0.25
Cleveland Indians

WS Probability = 0.25
Los Angeles Dodgers

WS Probability = 0.19
Tampa Bay Rays

WS Probability = 0.15
Boston Red Sox

WS Probability = 0.13
Chicago Cubs

WS Probability = 0.12
Seattle Mariners

WS Probability = 0.11
Los Angeles Angels

WS Probability = 0.1
Arizona Diamondbacks

WS Probability = 0.09
Kansas City Royals

WS Probability = 0.09
Washington Nationals

WS Probability = 0.08
Baltimore Orioles

WS Probability = 0.08
Minnesota Twins

WS Probability = 0.07
Oakland Athletics

WS Probability = 0.07
Toronto Blue Jays

WS Probability = 0.06
Milwaukee Brewers

WS Probability = 0.05
St. Louis Cardinals

WS Probability = 0.05
Pittsburgh Pirates

WS Probability = 0.05
San Francisco Giants

WS Probability = 0.04
Texas Rangers

WS Probability = 0.04
San Diego Padres

WS Probability = 0.03
New York Mets

WS Probability = 0.03
Philadelphia Phillies

WS Probability = 0.03
M

not sure.  move on.