In [1]:
# Import
import pandas as pd
import requests
import json
import inspect
from lxml import html
from pathlib import Path
from sportsreference.mlb.roster import Player
from sportsreference.mlb.roster import Roster
from bs4 import BeautifulSoup

In [2]:
# Variables
file_name = "FFS MLB Sheet.xlsx"
ffs_teams_list = [
    'ARI',
    'ATL',
    'BAL',
    'BOS',
    'CHC',
    'CWS',
    'CIN',
    'CLE',
    'COL',
    'DET',
    'HOU',
    'KC',
    'LAA',
    'LAD',
    'MIA',
    'MIL',
    'MIN',
    'NYM',
    'NYY',
    'OAK',
    'PHI',
    'PIT',
    'SD',
    'SF',
    'SEA',
    'STL',
    'TB',
    'TEX',
    'TOR',
    'WAS'
]
mlb_teams_list = [
    'ARI',
    'ATL',
    'BAL',
    'BOS',
    'CHC',
    'CHW',
    'CIN',
    'CLE',
    'COL',
    'DET',
    'HOU',
    'KC',
    'LAA',
    'LAD',
    'MIA',
    'MIL',
    'MIN',
    'NYM',
    'NYY',
    'OAK',
    'PHI',
    'PIT',
    'SD',
    'SF',
    'SEA',
    'STL',
    'TB',
    'TEX',
    'TOR',
    'WSN'
]

In [3]:
# GENERATE MLB_PLAYER_{YEAR}.csv
# THIS TAKES A REALLY LONG TIME

# Variables
year_list = [2020]
player_dict ={}

# Loop through each team every year
for year in year_list:
    for team in mlb_teams_list:
        roster = Roster(team,year,slim=True)
        players = roster.players
        player_dict.update(players)

# Create the dataframe
mlb_player_id_df = pd.DataFrame(player_dict.items(), columns=['srid','uf_name'])




In [4]:
# Look up player info
# Variables
player_dobs = []
positions = []
names = []

player_ids = mlb_player_id_df['srid']

# loop through players and add info to lists defined above
for player_id in player_ids:
    # Create Player object
    player_obj = Player(player_id)
    # DOB
    dob = player_obj.birth_date
    player_dobs.append(dob)
    # Position
    pos = player_obj.position
    positions.append(pos)
    names.append(player_obj.name.replace('.', '').replace(',', '').replace("'", ''))
    
mlb_player_id_df['dob'] = player_dobs
mlb_player_id_df['name'] = names
mlb_player_id_df['pos'] = positions

mlb_player_id_df

# Export as csv
#p = Path('players')
#mlb_player_id_df.to_csv(Path(p, 'mlb_players.csv'), index=False)




Unnamed: 0,srid,uf_name,dob,name,pos
0,kellyca02,Carson Kelly,1994-07-14,Carson Kelly,C
1,walkech02,Christian Walker,1991-03-28,Christian Walker,1B
2,marteke01,Ketel Marte,1993-10-12,Ketel Marte,2B
3,ahmedni01,Nick Ahmed,1990-03-15,Nick Ahmed,SS
4,escobed01,Eduardo Escobar,1989-01-05,Eduardo Escobar,SS
...,...,...,...,...,...
1169,romerse01,Seth Romero,1996-04-19,Seth Romero,P
1170,feddeer01,Erick Fedde,1993-02-25,Erick Fedde,P
1171,vothau01,Austin Voth,1992-06-26,Austin Voth,P
1172,braymbe01,Ben Braymer,1994-04-28,Ben Braymer,P


In [None]:
mlb_player_id_df[mlb_player_id_df['pos'] == '1B']

In [5]:
hitter_srids = []
pitcher_srids = []
hitter_abs = []
pitcher_ips = []


for player_id in player_ids:
    # Create Player object
    player_obj = Player(player_id)
    pos = player_obj.position
    # Stats - Batters
    if pos != 'P':
        career_abs = player_obj('9999').at_bats
        year_abs = player_obj('2020').at_bats
        prev_abs = player_obj('2019').at_bats
        if career_abs != year_abs and prev_abs != year_abs:
            hitter_srids.append(player_id)
            hitter_abs.append(year_abs)
    if pos == 'P':
        career_ips = player_obj('9999').innings_played
        year_ips = player_obj('2020').innings_played
        prev_ips = player_obj('2019').innings_played
        if career_ips != year_ips and prev_ips != year_ips:
            pitcher_srids.append(player_id)
            pitcher_ips.append(year_ips)
            

hitters_df = pd.DataFrame ({
    'srid': hitter_srids,
    'ab': hitter_abs
})

pitchers_df = pd.DataFrame ({
    'srid': pitcher_srids,
    'ip': pitcher_ips
})

merge1_df = pd.merge(mlb_player_id_df, hitters_df, on='srid', how='outer')
mlb_players_df = pd.merge(merge1_df, pitchers_df, on='srid', how='outer')
mlb_players_df

Unnamed: 0,srid,uf_name,dob,name,pos,ab,ip
0,kellyca02,Carson Kelly,1994-07-14,Carson Kelly,C,59.0,
1,walkech02,Christian Walker,1991-03-28,Christian Walker,1B,127.0,
2,marteke01,Ketel Marte,1993-10-12,Ketel Marte,2B,137.0,
3,ahmedni01,Nick Ahmed,1990-03-15,Nick Ahmed,SS,120.0,
4,escobed01,Eduardo Escobar,1989-01-05,Eduardo Escobar,SS,120.0,
...,...,...,...,...,...,...,...
1169,romerse01,Seth Romero,1996-04-19,Seth Romero,P,,
1170,feddeer01,Erick Fedde,1993-02-25,Erick Fedde,P,,22.2
1171,vothau01,Austin Voth,1992-06-26,Austin Voth,P,,23.2
1172,braymbe01,Ben Braymer,1994-04-28,Ben Braymer,P,,


In [6]:
# Import FFS MLB Sheet
# Read Player Names
name_cols = "G"
player_list = []

# Loop through each tab to pull list of player names
for team in ffs_teams_list:
    sheet = pd.read_excel(file_name, 
        sheet_name=team,
        usecols=name_cols,
        skiprows=2)
    names = sheet['Player']
    # Remove empty rows
    nan_value = float("NaN")
    names.replace("", nan_value, inplace=True)
    names.dropna(inplace=True)
    for name in names:
        # Name Formatting
        # Remove team name at beginning
        first_two = name[0]+name[1]
        if first_two == team:
            name = name.replace(f"{team} ", '')
        if first_two == 'RV':
            name = name.replace('RV ', '')
        if first_two == 'R1':
            name = name.replace('R1' , '')
        first_three = name[0]+name[1]+name[2]
        if first_three == team:
            name = name.replace(f"{team} ", '')
        # Remove Rule V at beginning
        first_five = name[0]+name[1]+name[2]+name[3]+name[4]
        if first_five == 'RV R1':
            name = name.replace('RV R1 ', '')
        # Remove other stuff
        name = name.replace('.', '').replace(',', '').replace("'", '').replace('Ã¡', 'd').replace('Ã©', 'e').replace('Ã­', 'i').replace('Ã±', 'n').replace('Ã³', 'o').replace('Ãº', 'u')
        player_dict = {
            "name": name,
            "owner": team
        }
        player_list.append(player_dict)

ffs_player_df = pd.DataFrame(player_list)
ffs_player_df

Unnamed: 0,name,owner
0,Luis Rodriguez,ARI
1,Mike Zunino,ARI
2,Andy Young,ARI
3,Misael Urbina,ARI
4,Jose Martinez,ARI
...,...,...
2942,Adonis Medina,WAS
2943,Bren Spillane,WAS
2944,Jackson Goddard,WAS
2945,Tyler Anderson,WAS


In [7]:
all_players_df = pd.merge(mlb_players_df, ffs_player_df, on='name', how='outer')
all_players_df.to_csv(r'players/all_players.csv')

KeyError: 'Passing list-likes to .loc or [] with any missing labels is no longer supported, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike'

In [None]:
https://www.fangraphs.com/search?q=
https://www.baseball-reference.com/search/search.fcgi?hint=jose+altuve&search=jose+altuve












