## Purpose: Pull 2019 data, merge, predict

In [1]:
# import dependencies.
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pickle
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC

In [2]:
# set up driver.
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

#### PITCHING DATA.

In [3]:
# Grab the data into lists for pitching
pitching_data = []

for year in range(2019, 2018, -1):
    year = str(year)
    url = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Team+pitching&game_type='R'&season="+year+"&season_type=ANY&league_code='MLB'&sectionType=st&statType=pitching&page=1&ts=1564260727128&playerType=QUALIFIER&sportCode='mlb'&split=&team_id=&active_sw=&position='1'&page_type=SortablePlayer&sortOrder='desc'&sortColumn=avg&results=&perPage=50&timeframe=&last_x_days=&extended=0"
    
    browser.visit(url)
    html = browser.html
    soup = bs(html, "html.parser")
    a = soup.find("tbody")
    for tr in a:
        team_data = {}
        team_data["year"] = year
        team_data["team"] = tr.find("td", class_="dg-team_full").text
        team_data["W"] = tr.find("td", class_="dg-w").text
        team_data["L"] = tr.find("td", class_="dg-l").text
        team_data["ERA"] = tr.find("td", class_="dg-era").text
        team_data["G1"] = tr.find("td", class_="dg-g").text
        team_data["GS"] = tr.find("td", class_="dg-gs").text
        team_data["SV"] = tr.find("td", class_="dg-sv").text
        team_data["SVO"] = tr.find("td", class_="dg-svo").text
        team_data["IP"] = tr.find("td", class_="dg-ip").text
        team_data["H1"] = tr.find("td", class_="dg-h").text
        team_data["R1"] = tr.find("td", class_="dg-r").text
        team_data["ER"] = tr.find("td", class_="dg-er").text
        team_data["HR1"] = tr.find("td", class_="dg-hr").text
        team_data["BB1"] = tr.find("td", class_="dg-bb").text
        team_data["SO1"] = tr.find("td", class_="dg-so").text
        team_data["WHIP"] = tr.find("td", class_="dg-whip").text
        team_data["CG"] = tr.find("td", class_="dg-cg").text
        team_data["SHO"] = tr.find("td", class_="dg-sho").text
        team_data["HB"] = tr.find("td", class_="dg-hb").text
        team_data["IBB1"] = tr.find("td", class_="dg-ibb").text
        team_data["GF"] = tr.find("td", class_="dg-gf").text
        team_data["HLD"] = tr.find("td", class_="dg-hld").text
        team_data["GIDP"] = tr.find("td", class_="dg-gidp").text
        team_data["GO1"] = tr.find("td", class_="dg-go").text
        team_data["AO1"] = tr.find("td", class_="dg-ao").text
        team_data["WP"] = tr.find("td", class_="dg-wp").text
        team_data["BK"] = tr.find("td", class_="dg-bk").text
        team_data["SB1"] = tr.find("td", class_="dg-sb").text
        team_data["CS1"] = tr.find("td", class_="dg-cs").text
        team_data["PK"] = tr.find("td", class_="dg-pk").text
        team_data["TBF"] = tr.find("td", class_="dg-tbf").text
        team_data["NP"] = tr.find("td", class_="dg-np").text
        team_data["WPCT"] = tr.find("td", class_="dg-wpct").text
        team_data["GO_AO1"] = tr.find("td", class_="dg-go_ao").text
        team_data["OBP1"] = tr.find("td", class_="dg-obp").text
        team_data["SLG1"] = tr.find("td", class_="dg-slg").text
        team_data["OPS"] = tr.find("td", class_="dg-ops").text
        pitching_data.append(team_data)
        team_data = {}

In [4]:
pitching_data = pd.DataFrame(pitching_data)
pitching_data.head()

Unnamed: 0,AO1,BB1,BK,CG,CS1,ER,ERA,G1,GF,GIDP,...,SO1,SV,SVO,TBF,W,WHIP,WP,WPCT,team,year
0,874,383,4,1,18,603,5.41,112,111,96,...,902,20,36,4427,52,1.46,46,0.464,Colorado Rockies,2019
1,984,403,3,1,16,560,5.06,113,112,97,...,866,25,42,4397,40,1.46,37,0.354,Kansas City Royals,2019
2,963,349,5,0,27,544,5.11,108,107,67,...,886,22,40,4213,32,1.43,40,0.299,Detroit Tigers,2019
3,1057,350,4,2,16,583,5.2,114,112,94,...,869,23,41,4437,47,1.42,51,0.412,Seattle Mariners,2019
4,1005,389,2,3,13,545,4.96,111,108,99,...,952,24,36,4351,57,1.44,48,0.514,Texas Rangers,2019


In [5]:
# move the columns around.
cols = pitching_data.columns.tolist()
cols = cols[-2:] + cols[:-2]
pitching_data = pitching_data[cols]
pitching_data.head()

Unnamed: 0,team,year,AO1,BB1,BK,CG,CS1,ER,ERA,G1,...,SHO,SLG1,SO1,SV,SVO,TBF,W,WHIP,WP,WPCT
0,Colorado Rockies,2019,874,383,4,1,18,603,5.41,112,...,4,0.477,902,20,36,4427,52,1.46,46,0.464
1,Kansas City Royals,2019,984,403,3,1,16,560,5.06,113,...,5,0.451,866,25,42,4397,40,1.46,37,0.354
2,Detroit Tigers,2019,963,349,5,0,27,544,5.11,108,...,2,0.472,886,22,40,4213,32,1.43,40,0.299
3,Seattle Mariners,2019,1057,350,4,2,16,583,5.2,114,...,2,0.483,869,23,41,4437,47,1.42,51,0.412
4,Texas Rangers,2019,1005,389,2,3,13,545,4.96,111,...,4,0.463,952,24,36,4351,57,1.44,48,0.514


#### HITTING DATA

In [7]:
# Grab the data into lists for hitting.
hitting_data = []

for year in range(2019, 2018, -1):
    year = str(year)
    url = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Team+hitting&game_type='R'&season="+year+"&season_type=ANY&league_code='MLB'&sectionType=st&statType=hitting&page=1&ts=1564257552743&playerType=QUALIFIER&sportCode='mlb'&split=&team_id=&active_sw=&position=&page_type=SortablePlayer&sortOrder='desc'&sortColumn=avg&results=&perPage=50&timeframe=&last_x_days=&extended=0"

    browser.visit(url)
    html = browser.html
    soup = bs(html, "html.parser")
    a = soup.find("tbody")
    for tr in a:
        team_data = {}
        team_data["year"] = year
        team_data["team"] = tr.find("td", class_="dg-team_full").text
        team_data["G"] = tr.find("td", class_="dg-g").text
        team_data["AB"] = tr.find("td", class_="dg-ab").text
        team_data["R"] = tr.find("td", class_="dg-r").text
        team_data["H"] = tr.find("td", class_="dg-h").text
        team_data["2B"] = tr.find("td", class_="dg-d").text
        team_data["3B"] = tr.find("td", class_="dg-t").text
        team_data["HR"] = tr.find("td", class_ ="dg-hr").text
        team_data["RBI"] = tr.find("td", class_="dg-rbi").text
        team_data["BB"] = tr.find("td", class_="dg-bb").text
        team_data["SO"] = tr.find("td", class_="dg-so").text
        team_data["SB"] = tr.find("td", class_="dg-sb").text
        team_data["CS"] = tr.find("td", class_="dg-cs").text
        team_data["OBP"] = tr.find("td", class_="dg-obp").text
        team_data["SLG"] = tr.find("td", class_="dg-slg").text
        team_data["OPS"] = tr.find("td", class_="dg-ops").text
        team_data["IBB"] = tr.find("td", class_="dg-ibb").text
        team_data["HBP"] = tr.find("td", class_="dg-hbp").text
        team_data["SAC"] = tr.find("td", class_="dg-sac").text
        team_data["SF"] = tr.find("td", class_="dg-sf").text
        team_data["TB"] = tr.find("td", class_="dg-tb").text
        team_data["XBH"] = tr.find("td", class_="dg-xbh").text
        team_data["GDP"] = tr.find("td", class_="dg-gidp").text
        team_data["GO"] = tr.find("td", class_="dg-go").text
        team_data["AO"] = tr.find("td", class_= "dg-ao").text
        team_data["GO_AO"] = tr.find("td", class_="dg-go_ao").text
        team_data["NP"] = tr.find("td", class_="dg-np").text
        team_data["PA"] = tr.find("td", class_="dg-tpa").text
        hitting_data.append(team_data)
        team_data = {}

In [8]:
hitting_data = pd.DataFrame(hitting_data)
hitting_data.head()

Unnamed: 0,2B,3B,AB,AO,BB,CS,G,GDP,GO,GO_AO,...,RBI,SAC,SB,SF,SLG,SO,TB,XBH,team,year
0,245,18,4068,1048,422,18,114,91,986,0.94,...,621,12,55,31,0.472,963,1920,438,Boston Red Sox,2019
1,218,17,3877,1085,430,21,113,104,986,0.91,...,582,8,49,47,0.48,810,1861,421,Houston Astros,2019
2,193,11,3798,933,411,17,111,81,895,0.96,...,614,10,40,26,0.475,979,1803,391,New York Yankees,2019
3,227,14,3929,1107,352,18,111,71,903,0.82,...,619,7,25,31,0.501,896,1967,458,Minnesota Twins,2019
4,241,35,3925,921,332,26,112,68,977,1.06,...,575,35,39,28,0.457,1041,1792,420,Colorado Rockies,2019


In [9]:
# move the columns around.
cols = hitting_data.columns.tolist()
cols = cols[-2:] + cols[:-2]
hitting_data = hitting_data[cols]
hitting_data.head()

Unnamed: 0,team,year,2B,3B,AB,AO,BB,CS,G,GDP,...,PA,R,RBI,SAC,SB,SF,SLG,SO,TB,XBH
0,Boston Red Sox,2019,245,18,4068,1048,422,18,114,91,...,4567,652,621,12,55,31,0.472,963,1920,438
1,Houston Astros,2019,218,17,3877,1085,430,21,113,104,...,4408,595,582,8,49,47,0.48,810,1861,421
2,New York Yankees,2019,193,11,3798,933,411,17,111,81,...,4279,645,614,10,40,26,0.475,979,1803,391
3,Minnesota Twins,2019,227,14,3929,1107,352,18,111,71,...,4381,642,619,7,25,31,0.501,896,1967,458
4,Colorado Rockies,2019,241,35,3925,921,332,26,112,68,...,4348,598,575,35,39,28,0.457,1041,1792,420


#### FIELDING DATA.

In [11]:
# Grab the data into lists for fielding data.
fielding_data = []

for year in range(2019, 2018, -1):
    year = str(year)
    url = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Team+fielding&game_type='R'&season="+year+"&season_type=ANY&league_code='MLB'&sectionType=st&statType=fielding&page=1&ts=1564284558293&playerType=QUALIFIER&sportCode='mlb'&split=&team_id=&active_sw=&position='1'&page_type=SortablePlayer&sortOrder='desc'&sortColumn=fpct&results=&perPage=50&timeframe=&last_x_days=&extended=2" 
    
    browser.visit(url)
    html = browser.html
    soup = bs(html, "html.parser")
    a = soup.find("tbody")
    for tr in a:
        team_data = {}
        team_data["year"] = year
        team_data["team"] = tr.find("td", class_="dg-team_full").text
        team_data["G2"] = tr.find("td", class_="dg-g").text
        team_data["GS2"] = tr.find("td", class_="dg-gs").text
        team_data["INN"] = tr.find("td", class_="dg-inn").text
        team_data["TC"] = tr.find("td", class_="dg-tc").text
        team_data["PO"] = tr.find("td", class_="dg-po").text
        team_data["A"] = tr.find("td", class_="dg-a").text
        team_data["E"] = tr.find("td", class_="dg-e").text
        team_data["DP"] = tr.find("td", class_="dg-dp").text
        team_data["SB2"] = tr.find("td", class_="dg-sb").text
        team_data["CS2"] = tr.find("td", class_="dg-cs").text
        team_data["SBPCT"] = tr.find("td", class_="dg-sbpct").text
        team_data["PB"] = tr.find("td", class_="dg-pb").text
        team_data["C_WP"] = tr.find("td", class_="dg-c_wp").text
        team_data["DER"] = tr.find("td", class_="dg-der").text
        fielding_data.append(team_data)
        team_data = {}

In [12]:
fielding_data = pd.DataFrame(fielding_data)
fielding_data.head()

Unnamed: 0,A,CS2,C_WP,DER,DP,E,G2,GS2,INN,PB,PO,SB2,SBPCT,TC,team,year
0,1083,17,23,0.704,120,47,110,990,8781.0,4,2927,21,0.553,4057,St. Louis Cardinals,2019
1,1069,21,35,0.7,87,50,112,1008,9078.0,4,3026,27,0.563,4145,Arizona Diamondbacks,2019
2,1044,16,37,0.68,107,50,113,1017,8961.0,6,2987,40,0.714,4081,Kansas City Royals,2019
3,931,13,32,0.73,56,51,113,1017,9147.0,7,3049,58,0.817,4031,Houston Astros,2019
4,1023,24,41,0.701,97,56,113,1017,9246.0,12,3082,48,0.667,4161,Tampa Bay Rays,2019


In [13]:
# move the columns around.
cols = fielding_data.columns.tolist()
cols = cols[-2:] + cols[:-2]
fielding_data = fielding_data[cols]
fielding_data.head()

Unnamed: 0,team,year,A,CS2,C_WP,DER,DP,E,G2,GS2,INN,PB,PO,SB2,SBPCT,TC
0,St. Louis Cardinals,2019,1083,17,23,0.704,120,47,110,990,8781.0,4,2927,21,0.553,4057
1,Arizona Diamondbacks,2019,1069,21,35,0.7,87,50,112,1008,9078.0,4,3026,27,0.563,4145
2,Kansas City Royals,2019,1044,16,37,0.68,107,50,113,1017,8961.0,6,2987,40,0.714,4081
3,Houston Astros,2019,931,13,32,0.73,56,51,113,1017,9147.0,7,3049,58,0.817,4031
4,Tampa Bay Rays,2019,1023,24,41,0.701,97,56,113,1017,9246.0,12,3082,48,0.667,4161


#### MERGE DATASETS.

In [14]:
# merge fielding_data and hitting_data on year and team.
fielding_hitting_merge = pd.merge(fielding_data, hitting_data, on=["team", "year"], how="left")
fielding_hitting_merge.head()

Unnamed: 0,team,year,A,CS2,C_WP,DER,DP,E,G2,GS2,...,PA,R,RBI,SAC,SB,SF,SLG,SO,TB,XBH
0,St. Louis Cardinals,2019,1083,17,23,0.704,120,47,110,990,...,4158,496,467,27,74,25,0.408,943,1509,316
1,Arizona Diamondbacks,2019,1069,21,35,0.7,87,50,112,1008,...,4400,583,558,19,62,32,0.446,939,1752,398
2,Kansas City Royals,2019,1044,16,37,0.68,107,50,113,1017,...,4249,483,457,18,92,28,0.398,962,1529,337
3,Houston Astros,2019,931,13,32,0.73,56,51,113,1017,...,4408,595,582,8,49,47,0.48,810,1861,421
4,Tampa Bay Rays,2019,1023,24,41,0.701,97,56,113,1017,...,4405,537,510,6,67,20,0.436,1065,1717,383


In [15]:
# merge fielding_hitting_merge with pitching_data.
team_data = pd.merge(fielding_hitting_merge, pitching_data, on=["team", "year"], how="left")
team_data.head()

Unnamed: 0,team,year,A,CS2,C_WP,DER,DP,E,G2,GS2,...,SHO,SLG1,SO1,SV,SVO,TBF,W,WHIP,WP,WPCT
0,St. Louis Cardinals,2019,1083,17,23,0.704,120,47,110,990,...,5,0.412,955,34,43,4126,58,1.3,23,0.527
1,Arizona Diamondbacks,2019,1069,21,35,0.7,87,50,112,1008,...,7,0.426,983,26,42,4260,56,1.28,35,0.5
2,Kansas City Royals,2019,1044,16,37,0.68,107,50,113,1017,...,5,0.451,866,25,42,4397,40,1.46,37,0.354
3,Houston Astros,2019,931,13,32,0.73,56,51,113,1017,...,9,0.397,1143,29,44,4173,73,1.12,32,0.646
4,Tampa Bay Rays,2019,1023,24,41,0.701,97,56,113,1017,...,6,0.382,1093,30,48,4224,65,1.17,41,0.575


#### GRAB COLUMNS OF INTEREST.

In [16]:
feature_columns = ['A', 'DP', 'E', 'G2', 'GS2', 'INN', 'PB', 'PO', 'TC', '2B', '3B', 'AB', 'AO', 'BB', 'CS', 'G', 'GDP', 'H', 'HBP', 'HR', 'IBB', 'NP_x', 'OBP', 'OPS_x', 'PA', 'R', 'RBI', 'SAC', 'SB', 'SF', 'SLG', 'SO', 'TB', 'XBH', 'BB1', 'BK', 'CG', 'ER', 'ERA', 'G1', 'GF', 'GS', 'H1', 'HB', 'HR1', 'IBB1', 'IP', 'L', 'OBP1', 'R1', 'SHO', 'SO1', 'SV', 'SVO', 'TBF', 'W', 'WHIP', 'WP', 'WPCT']
features = team_data[["year", 'A', 'DP', 'E', 'G2', 'GS2', 'INN', 'PB', 'PO', 'TC', '2B', '3B', 'AB', 'AO', 'BB', 'CS', 'G', 'GDP', 'H', 'HBP', 'HR', 'IBB', 'NP_x', 'OBP', 'OPS_x', 'PA', 'R', 'RBI', 'SAC', 'SB', 'SF', 'SLG', 'SO', 'TB', 'XBH', 'BB1', 'BK', 'CG', 'ER', 'ERA', 'G1', 'GF', 'GS', 'H1', 'HB', 'HR1', 'IBB1', 'IP', 'L', 'OBP1', 'R1', 'SHO', 'SO1', 'SV', 'SVO', 'TBF', 'W', 'WHIP', 'WP', 'WPCT']]
features.head()

Unnamed: 0,year,A,DP,E,G2,GS2,INN,PB,PO,TC,...,R1,SHO,SO1,SV,SVO,TBF,W,WHIP,WP,WPCT
0,2019,1083,120,47,110,990,8781.0,4,2927,4057,...,477,5,955,34,43,4126,58,1.3,23,0.527
1,2019,1069,87,50,112,1008,9078.0,4,3026,4145,...,512,7,983,26,42,4260,56,1.28,35,0.5
2,2019,1044,107,50,113,1017,8961.0,6,2987,4081,...,594,5,866,25,42,4397,40,1.46,37,0.354
3,2019,931,56,51,113,1017,9147.0,7,3049,4031,...,448,9,1143,29,44,4173,73,1.12,32,0.646
4,2019,1023,97,56,113,1017,9246.0,12,3082,4161,...,440,6,1093,30,48,4224,65,1.17,41,0.575


#### VERIFICATION.

In [17]:
# read in the data.
team_data = pd.read_csv("../Resources/clean_data_1969.csv")
del team_data["Unnamed: 0"]

# remove data from 2016 through 2019.
team_data_new = team_data.loc[team_data["year"] < 2016]
target = team_data_new["winners"]
features = team_data_new.drop({"team", "year", "winners"}, axis=1)
feature_columns = list(features.columns)


# UPSAMPLE THE DATA.
# reset the index.
team_data_new = team_data_new.reset_index().drop({"index"}, axis=1)

# remove team and year.
feature_columns_new = feature_columns + ["winners"]
team_data_new = team_data[feature_columns_new]

# upsample for a more balanced dataset.
def upsample(dataset, no_samples):
    # separate majority and minority classes.
    df_majority = dataset.loc[dataset["winners"] == 0]
    df_minority = dataset.loc[dataset["winners"] == 1]

    # upsample minority class.
    df_minority_unsampled = resample(df_minority,
                                    replace=True,
                                    n_samples=no_samples,
                                    random_state=123)

    # combine majority class with upsampled minority class.
    df_upsampled = pd.concat([df_majority, df_minority_unsampled])

    # separate features and target.
    y = df_upsampled["winners"]
    X = df_upsampled[feature_columns]
    
    # split into train and test sets.
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    # scale X_train and X_test.
    scaler = StandardScaler()

    # transform the training and testing data.
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.fit_transform(X_test)
    
    return X_train_scaled, X_test_scaled, y_train, y_test

# Do upsamplings.
X_train_100, X_test_100, y_train_100, y_test_100 = upsample(team_data_new, 2234)
X_train_50, X_test_50, y_train_50, y_test_50 = upsample(team_data_new, 1117)
X_train_25, X_test_25, y_train_25, y_test_25 = upsample(team_data_new, 559)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [18]:
# load pickles.
model_100 = pickle.load(open("../notebooks_for_models/1969/svc_grid_100.pkl", "rb"))
model_50 = pickle.load(open("../notebooks_for_models/1969/svc_grid_50.pkl", "rb"))
model_25 = pickle.load(open("../notebooks_for_models/1969/svc_grid_25.pkl", "rb"))

In [19]:
# RUN THE VERIFICATION.
def predict_the_winner(model, year, team_data, X_train):
    '''
    INPUT: 
    -X_train = scaled X train data.
    -model = the saved model.
    -team_data = complete dataframe with all data.
    -year = the year want to look at.
    
    OUTPUT:
    -printed prediction.
    
    DESCRIPTION:
    -data from year of interest is isolated.
    -the data are scaled.
    -the prediction is made.
    -print out the resulting probability and the name of the team.
    '''
    
    # grab the data.
    team_data = team_data.loc[team_data["year"] == year].reset_index()

    # set features (no team, year, winners).
    # set target (winners).
    features = team_data[feature_columns]
    
    # scale X_train and X_test.
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    features = scaler.fit_transform(features)
    
    # fit the model.
    probabilities = model.predict_proba(features)

    # convert predictions to datafram.e
    WS_predictions = pd.DataFrame(probabilities[:,1])

    # Sort the DataFrame (descending)
    WS_predictions = WS_predictions.sort_values(0, ascending=False)

    WS_predictions['Probability'] = WS_predictions[0]

    # Print 50 highest probability HoF inductees from still eligible players
    for i, row in WS_predictions.head(50).iterrows():
       prob = ' '.join(('WS Probability =', str(row['Probability'])))
       print(prob)
       print(team_data.iloc[i,1:27]["team"])
       print('')


In [21]:
# model_100 verification.
predict_the_winner(model_100, 2019, team_data, X_train_100)

WS Probability = 0.9956239611867604
Boston Red Sox

WS Probability = 0.9590764825898606
Cleveland Indians

WS Probability = 0.9546190573819126
Detroit Tigers

WS Probability = 0.6172689312091171
Minnesota Twins

WS Probability = 0.21864411205472745
Los Angeles Angels

WS Probability = 0.2149358895643747
Tampa Bay Rays

WS Probability = 0.16610583702922316
St. Louis Cardinals

WS Probability = 0.0844524235043789
Arizona Diamondbacks

WS Probability = 0.07134292035712121
New York Yankees

WS Probability = 0.04929907469852173
Atlanta Braves

WS Probability = 0.04383248506579274
Pittsburgh Pirates

WS Probability = 0.04309544109443426
Washington Nationals

WS Probability = 0.03130203132052158
San Diego Padres

WS Probability = 0.03014107457827023
Houston Astros

WS Probability = 0.029299982826124223
Chicago Cubs

WS Probability = 0.026485685753982177
San Francisco Giants

WS Probability = 0.018974727881140477
New York Mets

WS Probability = 0.015327532806735234
Cincinnati Reds

WS Probabil

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [23]:
predict_the_winner(model_50, 2019, team_data, X_train_50)

WS Probability = 0.9864225268542672
Boston Red Sox

WS Probability = 0.6260946935018998
Houston Astros

WS Probability = 0.5759645768371823
Cleveland Indians

WS Probability = 0.5342340242751272
Los Angeles Angels

WS Probability = 0.3788515985418775
Tampa Bay Rays

WS Probability = 0.23537217685958542
St. Louis Cardinals

WS Probability = 0.2332680515746027
Atlanta Braves

WS Probability = 0.1788807847785954
Arizona Diamondbacks

WS Probability = 0.16479271329049727
Chicago Cubs

WS Probability = 0.15988655514727404
Pittsburgh Pirates

WS Probability = 0.15119653566691998
San Francisco Giants

WS Probability = 0.14656532349122325
New York Yankees

WS Probability = 0.1434206464645377
Washington Nationals

WS Probability = 0.13938732818605779
San Diego Padres

WS Probability = 0.1349339391990964
New York Mets

WS Probability = 0.12125681447687245
Detroit Tigers

WS Probability = 0.09014307639011586
Oakland Athletics

WS Probability = 0.08958689884133877
Philadelphia Phillies

WS Probabi

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [24]:
predict_the_winner(model_25, 2019, team_data, X_train_25)

WS Probability = 0.8934392592134591
Boston Red Sox

WS Probability = 0.4299158242955364
Los Angeles Angels

WS Probability = 0.2248668974001906
Kansas City Royals

WS Probability = 0.22443688956429875
Atlanta Braves

WS Probability = 0.2096197853517553
San Diego Padres

WS Probability = 0.19269271845833236
Houston Astros

WS Probability = 0.18163750218246563
Oakland Athletics

WS Probability = 0.17695267533612566
Arizona Diamondbacks

WS Probability = 0.1651740955613368
Chicago Cubs

WS Probability = 0.16456729315928073
St. Louis Cardinals

WS Probability = 0.15626188330465815
Pittsburgh Pirates

WS Probability = 0.14731215598634617
New York Mets

WS Probability = 0.14679783279159522
San Francisco Giants

WS Probability = 0.14313921981576072
Washington Nationals

WS Probability = 0.138657742319001
Milwaukee Brewers

WS Probability = 0.13193994262840308
Toronto Blue Jays

WS Probability = 0.12432133311409375
Philadelphia Phillies

WS Probability = 0.11245076757477078
Baltimore Orioles



  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
