In [25]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
# Gets all the HTML from the given URL
page = requests.get("https://www.baseball-reference.com/leagues/MLB/2019-standard-batting.shtml")
# Parses the HTML string and turns it into a analyzable Beautiful Soup object
soup = BeautifulSoup(page.content, 'html.parser')
# Getting the first table from the page
my_table = soup.find('table')
# Getting the column header cells, which contains column labels
my_head = my_table.find('thead')
# Getting the inner text from each cell - like .innerText in JS
my_head = [cell.text for cell in my_head.find_all('th')]
# All the rows containing team batting totals - AB, R, H, etc.
my_table = [row for row in my_table.find_all('tr')]
# Getting the inner text from each row cell, i.e. the numbers, and converting them from strings to floats
my_table = [[float(cell.text) for cell in row.find_all('td')] for row in my_table]
# Filters out the empty cells my_table = [cell for cell in my_cells if cell]

In [26]:
huskies = pd.read_csv("../data/huskies_batters.csv")

In [27]:
huskies.columns

Index(['player', 'height', 'weight', 'G', 'PA', 'AB', 'R', 'H', '2B', '3B',
       'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'BA', 'OBP', 'SLG', 'OPS', 'OPS+',
       'TB', 'GDP', 'HBP', 'SH', 'SF', 'IBB', 'Pos', 'Rbat', 'Rbaser', 'Rdp',
       'Rfield', 'Rpos', 'RAA', 'WAA', 'Rrep', 'RAR', 'WAR', 'waaWL%',
       '162WL%', 'oWAR', 'dWAR', 'oRAR'],
      dtype='object')

In [28]:
# Paring our stats DataFrame from 28 columns to 10
df = huskies[['PA', 'H', '2B', '3B', 'HR', 'BB', 'IBB', 'GDP', 'HBP', 'SH', 'SF']]
# Calculating the number of singles (i.e. hits - non-single hits)
df['1B'] = df['H'] - df[['2B', '3B', 'HR']].sum(1)
# Adding together total walks: 4-ball walks + hit by pitch walks
df['WALK'] = df[['BB', 'HBP']].sum(1)
df['OUT'] = df['PA'] - (df['H'] + df['BB'] + df['HBP'] + df['IBB'])
# Outs = All plate appearancesdf['OUT'] = df['PA'] - df[['H', 'WALK']].sum(1)
# The needed columns for our 6 outcomes
df_probs = df[['1B', '2B', '3B', 'HR', 'WALK', 'OUT']]
# Dividing the rows by total number of plate appearances, to  get probabilities that add up to 1 for each row
df_probs = df_probs.div(df_probs.sum(1), axis=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['1B'] = df['H'] - df[['2B', '3B', 'HR']].sum(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['WALK'] = df[['BB', 'HBP']].sum(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['OUT'] = df['PA'] - (df['H'] + df['BB'] + df['HBP'] + df['IBB'])


In [29]:
df_probs

Unnamed: 0,1B,2B,3B,HR,WALK,OUT
0,0.167883,0.04562,0.0,0.040146,0.120438,0.625912
1,0.178499,0.048682,0.002028,0.01217,0.058824,0.699797
2,0.168654,0.052811,0.003407,0.030664,0.078365,0.666099
3,0.119705,0.055249,0.003683,0.029466,0.106814,0.685083
4,0.123016,0.055556,0.0,0.029762,0.05754,0.734127
5,0.192678,0.046243,0.003854,0.011561,0.071291,0.674374
6,0.175953,0.05132,0.002933,0.033724,0.073314,0.662757
7,0.178388,0.042882,0.003431,0.013722,0.090909,0.670669
8,0.149813,0.052434,0.001873,0.031835,0.076779,0.687266
9,0.157807,0.039867,0.003322,0.036545,0.061462,0.700997


In [30]:
team1 = df_probs.sample(6)
avails = [ix for ix in df_probs.index if ix not in team1.index]
team2 = df_probs.iloc[avails, :].sample(6)

In [31]:
class Player:
    def __init__(self, probs):
        self.probs = pd.Series(probs) # Player prob distribution
        self.stats = [] # Player at-bat results will be stored here
        
    # Randomly select number from 0 to 1; probability of outcomes will depend on individual player probs. Then, store in player stats
    def at_bat(self):
        outcome = np.random.choice(self.probs.index, p=self.probs.values)
        self.stats.append(outcome)
        return outcome
    # Calculate's player on-base percentage
    def OBP(self):
        nonouts = [ab for ab in self.stats if ab != 'OUT']
        return 1.0 * len(nonouts) / len(self.stats)
    
    # Calculates player batting average
    def AVE(self):
        apps = [ab for ab in self.stats if ab != 'WALK']
        hits = [ab for ab in apps if ab != 'OUT']
        return 1.0 * len(hits) / len(apps)
    
    # Records number of bases for each outcome (e.g. single = 1, double = 2)
    def bases(self, hit_type):
        if hit_type in ['WALK', '1B']:
            return 1
        elif hit_type == '2B':
            return 2
        elif hit_type == '3B':
            return 3
        elif hit_type == 'HR':
            return 4
        else:
            return 0
    
    # Slugging = average number of bases advanced per at-bat (counting walks as 1 base, slightly different from standard definition)  
    def slugging(self):
        return sum([self.bases(ab) for ab in self.stats]) / len(self.stats)

In [32]:
class Team:
    def __init__(self, players):
        self.players=players # 9x6 DataFrame
        self.record = [0, 0] # Initial 0-0 record, updated after each game
    # Adds one to win or loss column
    def update_record(self, boo):
        if boo:
            self.record[0] += 1
        else:
            self.record[1] += 1

In [33]:
class Game:
    def __init__(self,
                 teams,
                 inning=1,
                 outs=0,
                 away_or_home=0,
                 bases=[0,0,0],
                 score=[0,0],current_player=[0,0]):
        self.teams=teams
        self.inning=inning
        self.outs=outs
        self.away_or_home=away_or_home
        self.bases=bases
        self.score=score
        self.game_on=True
        self.current_player=current_player
def walker(self):
        self.bases.append(0)
        self.bases[0] += 1
        for i in range(3):
            if self.bases[i]==2:
                self.bases[i] -= 1
                self.bases[i+1] += 1
        runs = self.bases[-1]
        self.bases = self.bases[:3]
        self.score[self.away_or_home] += runs
def hitter(self, hit_type):
        if hit_type == '1B':
            self.bases = [1,0]+self.bases
        elif hit_type == '2B':
            self.bases = [0,1]+self.bases
        elif hit_type == '3B':
            self.bases = [0,0,1]+self.bases
        elif hit_type == 'HR':
            self.bases = [0,0,0,1]+self.bases
        runs = sum(self.bases[3:])
        self.bases = self.bases[:3]
        self.score[self.away_or_home] += runs
def handle_at_bat(self):
        player=self.teams[self.away_or_home].players[self.current_player[self.away_or_home]]
        result = player.at_bat()
        if result == 'OUT':
            self.outs += 1
        elif result == 'BB':
            self.walker()
        else:
            self.hitter(result)
        if (self.inning >= 9 and ((self.outs >= 3 and self.away_or_home == 0) or self.away_or_home == 1) and self.score[0] < self.score[1]) or (self.inning >= 9 and self.outs >= 3 and self.score[0] > self.score[1]):
            self.game_on = False
        if self.outs >= 3:
            if self.away_or_home == 1:
                self.inning += 1
            self.outs = 0
            self.current_player[self.away_or_home] = (self.current_player[self.away_or_home] + 1) % 9
            self.away_or_home = (self.away_or_home + 1) % 2
            self.bases = [0, 0, 0]
def play_game(self):
        while self.game_on:
            self.handle_at_bat()
        final_score = copy.copy(self.score)
        winner = 1 if (self.score[0] < self.score[1]) else 0
        self.teams[0].record[winner] += 1
        self.teams[1].record[(winner+1)%2] += 1
        self.inning = 1
        self.outs = 0
        self.away_or_home = 0
        self.bases = [0,0,0]
        self.score = [0,0]
        self.game_on = True
        return {
            "final_score": final_score,
            "winner": winner
        }

In [34]:
class Simulator:
    def __init__(self, teams, inning=1, away_or_home=0,bases=[0,0,0], outs=0, score=[0,0]):
        self.teams=teams
        self.inning=1
        self.outs=0
        self.away_or_home=away_or_home
        self.bases=[0,0,0]
        self.score=[0,0]
    
    def simulate(self, its=100):
        game_log = []
        wins = 0
        for i in range(its):
            game = Game([getattr(self, attr) for attr in dir(g) if "__" not in attr])
            result = game.play_game()
            wins += result.winner
            game_log.append(result)
        print(f"The home team won ${wins} out of ${its}, for a winning percentage of {wins / its * 100}%!")
        return game_log