In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as bs
from datetime import datetime
import pandas as pd
import numpy as np
import re
from unicodedata import normalize
import requests
import pandas as pd
import numpy as np
import time
from IPython.core.display import HTML


In [2]:
name_list = {'C': ['Willson Contreras', 'Jonathan Lucroy', 'Yadier Molina'], 
             '1B': ['Josh Bell', 'Hunter Dozier', 'Eric Hosmer'], 
             '2B': ['Jose Altuve', 'Cesar Hernandez', 'DJ LeMahieu'], 
             '3B': ['Matt Chapman', 'Hunter Dozier', 'Manny Machado', 'Yadier Molina'],
             'SS': ['Javier Baez', 'Carlos Correa', 'Francisco Lindor', 'Manny Machado'], 
             'OF': ['Lorenzo Cain', 'Bryce Harper', 'Jason Heyward',
                    'Aaron Hicks', 'Adam Jones', 'Nick Markakis', 
                    'Jose Martinez', 'Eddie Rosario', 'Nick Senzel', 'Christian Yelich']}
full_list = []
for k,v in name_list.items():
        full_list = full_list + v
full_list = list(set(full_list))

In [3]:
def url_maker(period, position):
    """
    period: either int or str
        possible values: 2017, 2018, 2019, 'y':for yesterday, '7d':last seven days, '14d': last 14 days, '21d': last 21 days
        '28d': last 28 days
    position: str
        possible values: 'C', '1B', '2B', 'SS', '3B', 'OF', 'U', 'SP', 'RP'
    return: url
        string
    """       
    url_list = []
    pos_list = ['C', '1B', '2B', 'SS', '3B', 'OF']
   
    if position == 'all':
        for pos in pos_list:
            if type(period) == int:
                url = 'https://www.cbssports.com/fantasy/baseball/stats/{}/{}/season/stats/'.format(pos,
                                                                                                    str(period))
                url_list.append(url)
            else:
                url = 'https://www.cbssports.com/fantasy/baseball/stats/{}/2019/{}/stats/'.format(pos, 
                                                                                                  period)
                url_list.append(url)
        return url_list
    else:
        if type(period) == int:
            url = 'https://www.cbssports.com/fantasy/baseball/stats/{}/{}/season/stats/'.format(position, str(period))
            url_list.append(url)
        else:
            url = 'https://www.cbssports.com/fantasy/baseball/stats/{}/2019/{}/stats/'.format(position, period)
    return url

def soup_maker(url):
    
    """
    url: 'str'
        takes the url of the page
    return:
    pageSoup: soup object
    """
    headers = {'User-Agent': 
           'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}
    
    page = url
    
    pageTree = requests.get(page, headers=headers,timeout=5)
    
    pageSoup = bs(pageTree.content, 'html.parser')
    
    return pageSoup

def col_names(soup):
    """
    Given a soup object this function will give us the column names of the table 
    paramters: 
    soup: soup object
    return:
    row_name_list: list
        Contains the long versions of the stats
        example: row_name_list = ['Hits', 'Home Runs', 'Strike Outs',,,]
    abbreviations: list
        Abbrevations of the baseball stats on the table of interest
        example: abbreviations = ['hr, 'avg', 'so', 'h',,,]
    """
    row_name_list = ['name', 'fpts']
    abbreviations = ['name', 'fpts']
    head_soup = soup.findAll('th', {'class': "TableBase-headTh TableBase-headTh--number has-tooltip "})
    for head in head_soup:
        row_name_list.append(head.find('div', {'class':'Tablebase-tooltipInner'}).string.strip())
        abbreviations.append(head.find('a').text)
    return row_name_list, abbreviations

def player_stats(soup, player_name):
    """
    takes the soup object and a player's name and
    returns this players statistics on the page given by soup
    parameters:
    soup: soup object of a file in which player's stats are given
    player_name: str
    return: list
    player_stat_list- this list contains the stats of the player
    example: soup, 'Josh Bell' --> [79.5, 19.9, 2, -, 4, 2, -, 23.2, ...]
    """
    player_stat_list = []
    table_body = soup.findAll('tr', {'class':'TableBase-bodyTr '})
    for body in table_body:
        try:
            a_tag = body.find('span',{'class':'CellPlayerName--long'}).a
            if (bool(a_tag)) & (a_tag.text == player_name):
                name = body.find('span',{'class':'CellPlayerName--long'}).a.string
                player_stat_list.append(name)
                for row in body.findAll('td', {'class':'TableBase-bodyTd TableBase-bodyTd--number '}):
                    player_stat_list.append(row.string.strip())
        except:
            continue
    return player_stat_list

def create_data_frame(soup,name_list):
    """
    creates a datafram for a given list of players
    parameters:
    soup: soup object
    name_list = list
        names of the list of players
    return:
    df = dataframe
    """
    r, abbreviations = col_names(soup)
    player_list = []
    for name in name_list:
        p = player_stats(soup, name)
        if p:
            p_a = np.array(p)
            player_list.append(p_a)
    df = pd.DataFrame(player_list, columns= abbreviations)
    return df

def final(period, position, name_list):
    df_list = [] 
    if position == 'all':
        url_list = url_maker(period, position)
        for url in url_list:
            soup = soup_maker(url)
            try:
                df = create_data_frame(soup, name_list)
                df_list.append(df)
            except: 
                print('following url failed:', url)
                continue
        df_final = pd.concat(df_list)
        df_final.drop_duplicates(inplace = True)
        df_final.reset_index(drop = True, inplace = True)
        col_list = df.columns.tolist()
        for col in col_list:
            df[col] = df[col].astype('int32', errors = 'ignore')
        df.set_index(keys= 'name', verify_integrity= True, inplace= True)
        df.drop(columns = ['fpts', 'cs'], inplace = True)
        return df_final
    else:
        url = url_maker(period, position)
        soup = soup_maker(url)
        df = create_data_frame(soup, name_list)
        df.drop_duplicates(inplace = True)
        col_list = df.columns.tolist()
        for col in col_list:
            df[col] = df[col].astype('int32', errors = 'ignore')
        df.set_index(keys= 'name', verify_integrity= True, inplace= True)
        df.drop(columns = ['fpts', 'cs'], inplace = True)
        return df
    


In [4]:
df_of = final('21d', 'OF', name_list['OF'])

In [5]:
display(HTML(df_of.sort_values(by = 'fppg', axis = 0 , ascending= False).to_html()))

Unnamed: 0_level_0,fppg,gp,tpa,ab,r,h,1b,2b,3b,hr,rbi,avg,obp,slg,ops,bb,so,tb,sb,hbp,sh,sf,e,a,gdp
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Christian Yelich,4.0,15,65,53,13,15,8,3,—,4,6,0.283,0.415,0.566,0.981,10,13,30,3,2,—,—,—,2,4
Nick Senzel,3.4,15,71,63,10,15,10,1,1,3,7,0.238,0.314,0.429,0.743,7,17,27,4,—,—,—,1,—,1
Lorenzo Cain,3.2,18,89,81,11,21,12,8,—,1,10,0.259,0.315,0.395,0.71,6,11,32,2,1,—,1,—,—,3
Adam Jones,3.0,17,74,70,8,17,10,3,—,4,10,0.243,0.284,0.457,0.741,4,9,32,1,—,—,—,1,1,4
Nick Markakis,3.0,20,84,69,13,17,11,3,1,2,8,0.246,0.369,0.406,0.775,14,7,28,—,—,—,1,—,—,1
Jose Martinez,2.9,20,78,69,11,23,16,5,—,2,10,0.333,0.41,0.493,0.903,8,13,34,—,1,—,—,—,1,1
Eddie Rosario,2.6,19,79,74,11,20,17,1,—,2,11,0.27,0.291,0.365,0.656,3,10,27,1,—,—,2,—,—,2
Bryce Harper,2.4,18,76,63,11,12,5,5,—,2,9,0.19,0.329,0.365,0.694,12,26,23,—,1,—,—,1,—,—
