In [83]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re

HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}

def get_starting_pitchers(headers=HEADERS):

    response = requests.get('https://baseballsavant.mlb.com/probable-pitchers')
    soup = BeautifulSoup(response.text, 'html.parser')

    matchup_strings = [(i.text, i.get('href').split('=')[-1]) for i in soup.find_all("a", {"class": "matchup-link"}) if 'src=' not in str(i)]
    pitchers        = {tup[0]:None for tup in matchup_strings}

    return pitchers

starting_pitchers = list(get_starting_pitchers().keys())

def categorize_quality_start(row):
    if float(row['IP']) >= 6 and float(row['ER']) <= 3:
        return 1
    else:
        return 0

def categorize_win_loss(row):
    if row['DEC'] == 'W':
        return 1
    if row['DEC'] == 'L':
        return 0
    return 0

def calculate_pitcher_fantasy_score(row):
    score = int(float(row['IP']) * 3)  + \
            int(float(row['SO']) * 3)  + \
            int(float(row['ER']) * -3) + \
            int(float(row['QS']) * 4)  + \
            int(float(row['WIN'])* 6)

    return score

def pitcher_fantasy_score(player, query, display_df =True, headers=HEADERS):

    num_games = query
    query = f'last {query} game logs'
    query = player + '-' + query
    query  = query.lower().replace(' ', '-')
        
    response = requests.get(f'https://www.statmuse.com/mlb/ask/{query}', headers).text

    string  = re.search(r'visual-answer answer="(.*?)/visual-answer>',response).group(1)
        
    col_str = re.search(r'columns&quot;:(.*?)]', string).group(1)[1:].split('}')[:-1]
    
    columns = [re.findall(r';(.*?)&', col)[2] for col in col_str]
            
    rows    = re.findall(r'rows&quot;:\[{&quot(.*?)}}"><', string)[0].split('}},')[:-1]

    col_ord = ['NAME', 'DATE', 'TM', 'H/A', 'OPP', 'DEC', 'GS', 'CG', 'GF', 'SHO', 'SV', 'IP', 'H', 'TBF', 'R',
               'ER', 'HR', 'BB', 'IBB', 'HBP', 'SO', 'WP']

    _rows   = []
    for row in rows:
        
        cols = row.split('},')
        storage = {}
        for col in cols:
            
            key = re.search(r';(.*?)&quot;', col).group(1)
            if key in columns:
                
                val = col.split(';value&quot;:')[-1].replace('&quot;','')
                
                if key in ['DATE', 'TEAM', 'TM', 'OPP', 'NAME']:
                    val = re.search(rf'{key}:{{display:(.*?),', val).group(1)

                storage[key] = val
                
        _rows.append(storage)     
        
    df = pd.DataFrame(_rows).rename(columns={'ALIGNMENT':'H/A'})
    # df = df[col_ord].iloc[::-1].reset_index(drop=True)

    # define some new parameters
    df['QS']  = df.apply(lambda row: categorize_quality_start(row), axis=1)
    df['WIN'] = df.apply(lambda row: categorize_win_loss(row), axis=1)
    df['FS']  = df.apply(lambda row: calculate_pitcher_fantasy_score(row), axis=1)

    if display_df:
        with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
            display(df)

    df  = df.iloc[:num_games,:]
    
    avg = round(df['FS'].mean(),1)

    return avg

storage = {}
for player in starting_pitchers:
    try:
        val = pitcher_fantasy_score(player, query=5, display_df=False)
        storage[player] = val
    except Exception as err:
        pass

df = pd.DataFrame.from_dict(storage, orient='index', columns=['AVG FANTASY SCORE'])
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(df)
    

Unnamed: 0,AVG FANTASY SCORE
Tommy Nance,12.6
Mike Minor,16.6
Bryse Wilson,19.6
Bailey Falter,26.8
Reid Detmers,38.2
Chad Kuhl,9.2
Chris Bassitt,36.6
JT Chargois,6.4
Tyler Alexander,19.0
Glenn Otto,25.4


In [76]:
def calculate_hitter_fantasy_score(row):

    score = int(float(row['1B']) * 3 ) + \
            int(float(row['2B']) * 5 ) + \
            int(float(row['3B']) * 8 ) + \
            int(float(row['HR']) * 10) + \
            int(float(row['R'])  * 2 ) + \
            int(float(row['RBI'])* 2 ) + \
            int(float(row['BB']) * 2 ) + \
            int(float(row['HBP'])* 2 ) + \
            int(float(row['SB']) * 5 )  

    return score

def calculate_singles(row):
    return float(row['H']) - (float(row['2B']) + float(row['3B']) + float(row['HR'])) 

def hitter_fantasy_score(player, query, display_df =True, headers=HEADERS):

    query = player + '-' + query
    query  = query.lower().replace(' ', '-')
    
    response = requests.get(f'https://www.statmuse.com/mlb/ask/{query}', headers).text

    string  = re.search(r'visual-answer answer="(.*?)/visual-answer>',response).group(1)
        
    col_str = re.search(r'columns&quot;:(.*?)]', string).group(1)[1:].split('}')[:-1]
    
    columns = [re.findall(r';(.*?)&', col)[2] for col in col_str]
            
    rows    = re.findall(r'rows&quot;:\[{&quot(.*?)}}"><', string)[0].split('}},')[:-1]

    col_ord = ['NAME', 'DATE', 'TM', 'H/A', 'OPP', 'AB', 'R', 'H', '2B','3B', 'HR', 'RBI','SH','SF', 'SB','CS', 'BB','IBB', 'HBP', 'SO', 'GIDP', 'PA', 'TB', 'XBH']

    _rows   = []
    for row in rows:
        
        cols = row.split('},')
        storage = {}
        for col in cols:
            
            key = re.search(r';(.*?)&quot;', col).group(1)
            if key in columns:
                
                val = col.split(';value&quot;:')[-1].replace('&quot;','')
                
                if key in ['DATE', 'TM', 'OPP', 'NAME']:
                    val = re.search(rf'{key}:{{display:(.*?),', val).group(1)

                # check last index XBH
                if key == 'XBH':
                    try: 
                        val = float(val)
                    except Exception as err:
                        val = val[:3]
                
                storage[key] = val
                    
        _rows.append(storage)          
        
    df = pd.DataFrame(_rows).rename(columns={'ALIGNMENT':'H/A'})
    # df = df[col_ord].iloc[::-1].reset_index(drop=True)

    # define some new parameters
    df['1B']  = df.apply(lambda row: calculate_singles(row), axis=1)
    df['FS']  = df.apply(lambda row: calculate_hitter_fantasy_score(row), axis=1)

    if display_df:
        with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
            display(df)

    avg = round(df['FS'].mean(),1)

    return avg

def get_lineups():
    response = requests.get('https://www.rotowire.com/baseball/daily-lineups.php?date=tomorrow', headers=HEADERS).text
    regex    = re.findall(r'<a title="(.*?)"',response)
    return regex

players = get_lineups()
storage = {}
for player in players:
    display_flag = False
    try:
        # if player in ['Vladimir Guerrero', 'Nick Allen']:
        #     display_flag = True
        storage[player] = hitter_fantasy_score(player, query='last 15 game logs', display_df=display_flag)

    except Exception as err:
        print(err)
        print(player)
    
df = pd.DataFrame.from_dict(storage, orient='index', columns=['AVG FANTASY SCORE'])
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(df.sort_values(by=['AVG FANTASY SCORE'], ascending=False))

could not convert string to float: 'H:{display:'
Will Smith
'NoneType' object has no attribute 'group'
Nathaniel Lowe
'2B'
Jose Barrero
could not convert string to float: 'H:{display:'
Nicky Lopez
could not convert string to float: 'H:{display:'
Pablo Reyes
could not convert string to float: 'H:{display:'
Victor Caratini
could not convert string to float: 'H:{display:'
Darin Ruf
could not convert string to float: 'H:{display:'
Yadier Molina


Unnamed: 0,AVG FANTASY SCORE
Aaron Judge,12.8
Paul Goldschmidt,12.2
Alex Bregman,11.8
J.T. Realmuto,10.8
Vaughn Grissom,10.4
Mookie Betts,10.2
Bryce Harper,9.8
Andres Gimenez,9.6
Christian Walker,9.5
Vladimir Guerrero,9.5
