In [361]:
from bs4 import BeautifulSoup
from pybaseball import playerid_lookup
import json
import lxml
import pandas as pd
import re, os
import requests
from time import sleep

pd.set_option("display.max_columns", 1000)

# Scraping Each Player's Meanest Season
We're going to initialize a list of all of our players and then run each player through a scraper to pull a set of stats that describes the numbers we could expect out of them for a full, 162-game season.

For batters, this is easy. Here's the formula we can use for each stat to arrive at an expected value for that stat for a full 162-game season: 

```
Career games played / 162 = [[FACTOR]]
[[STAT]] / [[FACTOR]] = expectation
```

For pitchers, this is a little more complicated, because pitchers don't play every game. But our friends over at Baseball Reference **(BBREF)** have attempted to tackle this by treating each complete pitcher-season as:

```
games_pitched = X
games_started = X * 2
games_pitched + games_started = 68
```

So for a pitcher, either 34 starts or 68 relief appearances is worth one pitcher_season, and one pitcher_season is pegged to a value of 68. Their rationale is that, across history, full-time starters average to around 34 starts per year, and full-time relievers average about 68 relief appearances per year.

But some pitchers appear as both starters and relievers. Hence the need for normalization.

Thankfully, our friends at Baseball Reference have done all of these calculations for us, and they're available to view for free, with no login necessary.

Which mean we can scrape the hell out of it.

## Let's initialize a list of our players

In [362]:
pitchers = [
    'bartolo colon',
    'fernando rodney',
    'reyes moronta',
    'josé mijares',
    'tom healey',
    'jean machi',
    'fernando rodney',
    'angel castro',
    'edwar colina',
    'framber valdez',
    'andrew carignan',
    'keegan akin',
    'julio mateo',
    'enrique gonzalez',
]
batters = [
    'alejandro kirk',
    'brayan pena',
    'pablo sandoval',
    'tyler white',
    'josh phegley',
    'donovan solano',
    'alberto callaspo',
    'miguel tejada',
    'josh naylor',
    'dayán viciedo',
    'harold ramirez',
    'willians astudillo',
    'bob fothergill'
]


In order to scrape Baseball Reference, we'll need the unique ID that BBREF has given every player for whom they have a record.

Thankfully, the PyBaseball library has a module that'll dig out this ID for us if we feed it our player names:

```python

from pybaseball import playerid_lookup

playerid_lookup('last', 'first', fuzzy=True)['key_bbref'][0]

# e.g.:


>>> Bartolo Colon as ('colon', 'bartolo')

'colonba01'
```

But we can make ourselves a little loop that will take our list of player names, spin them around, run them through player_lookup, and return a cute little dataframe with everything we need.

And then we party.

In [363]:
names_and_ids = []
count = 0
for name in range(len(pitchers)):
    name = pitchers[count].split()
    name.append(playerid_lookup(name[1], name[0], fuzzy=True)["key_bbref"][0])
    names_and_ids.append(name)
    count += 1
    sleep(0.25)
huskies_p = pd.DataFrame(names_and_ids, columns=["first", "last", "key_bbref"])
huskies_p['pitcher'] = True

No identically matched names found! Returning the 5 most similar names.


In [364]:
names_and_ids = []
count = 0
for name in range(len(batters)):
    name = batters[count].split()
    name.append(playerid_lookup(name[1], name[0], fuzzy=True)["key_bbref"][0])
    names_and_ids.append(name)
    count += 1
    sleep(0.25)
huskies_b = pd.DataFrame(names_and_ids, columns=["first", "last", "key_bbref"])
huskies_b['pitcher'] = False

In [365]:
huskies = huskies_b.merge(huskies_p, how='outer')

## Now we run everything through our scraper

In [366]:
huskies

Unnamed: 0,first,last,key_bbref,pitcher
0,alejandro,kirk,kirkal01,False
1,brayan,pena,penabr01,False
2,pablo,sandoval,sandopa01,False
3,tyler,white,whitety01,False
4,josh,phegley,phegljo01,False
5,donovan,solano,solando01,False
6,alberto,callaspo,callaal01,False
7,miguel,tejada,tejadmi01,False
8,josh,naylor,naylojo01,False
9,dayán,viciedo,vicieda01,False


In [367]:
targets = []
count = 0
for player in range(len(huskies['key_bbref'])):
    id = huskies['key_bbref'][count]
    targets.append(id)
    count += 1

In [368]:
# Snippet from github user: BenKite
# https://github.com/BenKite/baseball_data/blob/master/baseballReferenceScrape.py

def findTables(url):
    res = requests.get(url)
    ## The next two lines get around the issue with comments breaking the parsing.
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    divs = soup.findAll('div', id = "content")
    divs = divs[0].findAll("div", id=re.compile("^all"))
    ids = []
    for div in divs:
        searchme = str(div.findAll("table"))
        x = searchme[searchme.find("id=") + 3: searchme.find(">")]
        x = x.replace("\"", "")
        if len(x) > 0:
            ids.append(x)
    return(ids)

In [369]:
findTables('https://www.baseball-reference.com/players/c/castran01.shtml')

['pitching_standard',
 'pitching_value',
 'pitching_advanced',
 'batting_standard',
 'batting_value',
 'standard_fielding',
 'appearances']

In [370]:
# Snippet from github user: BenKite
# https://github.com/BenKite/baseball_data/blob/master/baseballReferenceScrape.py

def pullTable(url, tableID):
    res = requests.get(url)
    ## Work around comments
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    tables = soup.findAll('table', id = tableID)
    data_rows = tables[0].findAll('tr')
    data_header = tables[0].findAll('thead')
    data_header = data_header[0].findAll("tr")
    data_header = data_header[0].findAll("th")
    game_data = [[td.getText() for td in data_rows[i].findAll(['th','td'])]
        for i in range(len(data_rows))
        ]
    data = pd.DataFrame(game_data)
    header = []
    for i in range(len(data.columns)):
        header.append(data_header[i].getText())
    data.columns = header
    data = data.loc[data[header[0]] != header[0]]
    data = data.reset_index(drop = True)
    return(data)

In [371]:
test_a = pullTable("https://www.baseball-reference.com/players/c/castran01.shtml", "pitching_standard")
test_b = pullTable("https://www.baseball-reference.com/players/c/castran01.shtml", "pitching_value")


In [372]:
test_a

Unnamed: 0,Year,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Awards
0,2006,23,DET-min,FRk,4.0,1,0.8,1.39,9,9,0,1.0,0,0,58.1,34,11,9,1,10,0,71,1,0,3.0,216.0,,,0.754,5.2,0.2,1.5,11.0,7.1,TGR DOSL
1,2007,24,DET-min,"A,A+",11.0,6,0.647,3.39,27,25,0,1.0,0,0,151.1,137,63,57,16,49,0,78,8,0,6.0,624.0,,,1.229,8.1,1.0,2.9,4.6,1.59,"WMI,LAK MIDW,FLOR"
2,2008,25,DET-min,"AA,A+",4.0,3,0.571,3.08,47,0,20,0.0,0,7,79.0,70,31,27,4,30,2,52,4,1,5.0,330.0,,,1.266,8.0,0.5,3.4,5.9,1.73,"ERI,LAK EL,FLOR"
3,2009,26,PHI-TBR-min,"A+,AA",2.0,3,0.4,10.46,20,0,8,0.0,0,1,26.2,41,38,31,5,22,2,17,1,0,6.0,140.0,,,2.363,13.8,1.7,7.4,5.7,0.77,"REA,POE,MTG EL,FLOR,SOUL"
4,2013,30,LAD-min,AAA,8.0,5,0.615,3.48,25,19,1,0.0,0,0,116.1,123,50,45,7,37,2,91,4,0,5.0,501.0,,,1.375,9.5,0.5,2.9,7.0,2.46,ABQ PCL
5,2014,31,STL-OAK-min,AAA,9.0,9,0.5,4.66,30,18,5,0.0,0,1,114.0,130,71,59,14,32,1,73,3,0,2.0,491.0,,,1.421,10.3,1.1,2.5,5.8,2.28,"MEM,SAC PCL"
6,2015,32,OAK-min,AAA,2.0,1,0.667,3.13,38,2,23,0.0,0,8,60.1,54,23,21,6,19,0,45,3,0,0.0,249.0,,,1.21,8.1,0.9,2.8,6.7,2.37,NVL PCL
7,2015,32,OAK,AL,0.0,1,0.0,2.25,5,0,3,0.0,0,0,4.0,8,1,1,1,3,0,4,0,0,0.0,22.0,190.0,6.63,2.75,18.0,2.3,6.8,9.0,1.33,
8,2016,33,OAK-min,AAA,2.0,8,0.2,5.15,37,10,11,1.0,0,2,92.2,107,59,53,13,36,0,58,6,1,5.0,416.0,,,1.543,10.4,1.3,3.5,5.6,1.61,NVL PCL
9,1 Yr,0,1,.000,2.25,5,0.0,3.0,0,0,0,4.0,8,1,1.0,1,3,0,4,0,0,0,22,190,6.63,2.75,18.0,2.3,6.8,9.0,1.33,,,,


In [373]:
test_a = test_a.drop(test_a.index[:-1])
test_b = test_b.drop(test_b.index[:-1])

In [374]:
test_a

Unnamed: 0,Year,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Awards
10,162 Game Avg.,0,14,0.0,2.25,68,0,41,0,0,0,54,109,14,14,14,41,0,54,0,0,0,299,190,6.63,2.75,18.0,2.3,6.8,9.0,1.33,,,,


In [375]:
test_b

Unnamed: 0,Year,Age,Tm,Lg,IP,G,GS,R,RA9,RA9opp,RA9def,RA9role,PPFp,RA9avg,RAA,WAA,gmLI,WAAadj,WAR,RAR,waaWL%,162WL%,Salary,Awards
2,162 Game Avg.,,54,68,0,14,2.25,4.27,-0.12,-0.32,93.7,3.82,14,1.4,0.55,0.0,1.4,14,0.514,0.5,,,,


In [376]:
test_c = pd.DataFrame()

In [377]:
test_c = test_a.append(test_b)

  test_c = test_a.append(test_b)


In [378]:
test_c = test_c.groupby('Year', as_index=False).first()  ### DING DING DING

In [379]:
test_c['name'] = pitchers[0]
test_c = test_c.drop(columns=['Year', 'Age', 'Tm', 'Lg'])
test_c.set_index('name')

Unnamed: 0_level_0,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Awards,RA9,RA9opp,RA9def,RA9role,PPFp,RA9avg,RAA,WAA,gmLI,WAAadj,WAR,RAR,waaWL%,162WL%,Salary
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
bartolo colon,2.25,68,0,41,0,0,0,54,109,14,14,14,41,0,54,0,0,0,299,190,6.63,2.75,18.0,2.3,6.8,9.0,1.33,,,,,-0.12,-0.32,93.7,3.82,14,1.4,0.55,0.0,1.4,14,0.514,0.5,,,


In [380]:
test_c

Unnamed: 0,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Awards,RA9,RA9opp,RA9def,RA9role,PPFp,RA9avg,RAA,WAA,gmLI,WAAadj,WAR,RAR,waaWL%,162WL%,Salary,name
0,2.25,68,0,41,0,0,0,54,109,14,14,14,41,0,54,0,0,0,299,190,6.63,2.75,18.0,2.3,6.8,9.0,1.33,,,,,-0.12,-0.32,93.7,3.82,14,1.4,0.55,0.0,1.4,14,0.514,0.5,,,,bartolo colon


In [381]:
root = "https://www.baseball-reference.com/players/"
suffix = ".shtml"
target = targets[20]
prepend = target[0] + "/"
query = root + prepend + target + suffix
print(query)
response = requests.get(query)
doc = BeautifulSoup(response.text)
table = doc.select("#pitching_value")
table

https://www.baseball-reference.com/players/c/castran01.shtml


[]

In [382]:
test_d = pd.DataFrame()

In [383]:
test_d.append(test_c)

  test_d.append(test_c)


Unnamed: 0,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Awards,RA9,RA9opp,RA9def,RA9role,PPFp,RA9avg,RAA,WAA,gmLI,WAAadj,WAR,RAR,waaWL%,162WL%,Salary,name
0,2.25,68,0,41,0,0,0,54,109,14,14,14,41,0,54,0,0,0,299,190,6.63,2.75,18.0,2.3,6.8,9.0,1.33,,,,,-0.12,-0.32,93.7,3.82,14,1.4,0.55,0.0,1.4,14,0.514,0.5,,,,bartolo colon


In [384]:
test_d

In [385]:
test_c

Unnamed: 0,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Awards,RA9,RA9opp,RA9def,RA9role,PPFp,RA9avg,RAA,WAA,gmLI,WAAadj,WAR,RAR,waaWL%,162WL%,Salary,name
0,2.25,68,0,41,0,0,0,54,109,14,14,14,41,0,54,0,0,0,299,190,6.63,2.75,18.0,2.3,6.8,9.0,1.33,,,,,-0.12,-0.32,93.7,3.82,14,1.4,0.55,0.0,1.4,14,0.514,0.5,,,,bartolo colon


In [395]:
test_c.append(huskies_x)

  test_c.append(huskies_x)


Unnamed: 0,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Awards,RA9,RA9opp,RA9def,RA9role,PPFp,RA9avg,RAA,WAA,gmLI,WAAadj,WAR,RAR,waaWL%,162WL%,Salary,name
0,2.25,68,0,41,0,0,0,54,109,14,14,14,41,0,54,0,0,0,299,190,6.63,2.75,18.0,2.3,6.8,9.0,1.33,,,,,-0.12,-0.32,93.7,3.82,14,1.4,0.55,0.0,1.4,14,0.514,0.5,,,,bartolo colon
0,2.25,68,0,41,0,0,0,54,109,14,14,14,41,0,54,0,0,0,299,190,6.63,2.75,18.0,2.3,6.8,9.0,1.33,,,,,-0.12,-0.32,93.7,3.82,14,1.4,0.55,0.0,1.4,14,0.514,0.5,,,,bartolo colon


In [386]:
try:
    huskies_x
except:
    huskies_x = test_c

In [392]:
type(huskies_x)

pandas.core.frame.DataFrame

In [393]:
root = "https://www.baseball-reference.com/players/"
suffix = ".shtml"
count = 0
for target in range(len(targets)):
    target = targets[count]
    prepend = target[0] + "/"
    query = root + prepend + target + suffix
    
    if huskies.pitcher[count] == True:
        pitching_standard = pullTable(query, "pitching_standard")
        pitching_standard = pitching_standard.drop(pitching_standard.index[:-1])
        sleep(.2)
        pitching_value = pullTable(query, "pitching_value")
        pitching_value = pitching_value.drop(pitching_value.index[:-1])
        pitching_combined = pitching_standard.append(pitching_value)
        pitching_combined = pitching_combined.groupby('Year', as_index=False).first()
        pitching_combined['first'] = huskies['first'][count]
        pitching_combined['last'] = huskies['last'][count]
        pitching_combined['key_bbref'] = target
        try:
            huskies_p 
            huskies_p.append(pitching_combined)
        except:
            huskies_p = pitching_combined
    else:
        print('nah')
        
    # else:
    #     batting_standard = pullTable(query, "batting_standard")
    #     sleep(1)
    #     batting_value = pullTable(query, "batting_value")
    #     batting_combined = batting_standard.append(batting_value)
    #     batting_combined = batting_combined.groupby('Year', as_index=False).first()
    #     batting_combined['first'] = huskies['first'][count]
    #     batting_combined['last'] = huskies['last'][count]
    #     batting_combined['key_bbref'] = target
    #     try:
    #         huskies_b.append(batting_combined)
    #     except:
    #         huskies_b = batting_combined

    sleep(.2)
    count += 1


nah
nah
nah
nah
nah
nah
nah
nah
nah
nah
nah
nah
nah


  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


         Year Age     Tm    Lg     W     L  W-L%   ERA   G    GS    GF     CG  \
0  NL (5 yrs)  56     46  .549  4.16   128   125     1   5     2     0  768.2   
1  NYY (1 yr)      164.1    29  None  None  None  None  85  4.66  None   None   

    SHO    SV   IP     H     R    ER    HR    BB   IBB    SO   HBP    BK  \
0   842   386  355    91   145    14   531    13     0     4  3250    93   
1  None  None   26  None  4.54  None  None  None  None  None  None  None   

     WP     BF  ERA+   FIP  WHIP    H9   HR9   BB9   SO9  SO/W Awards   RA9  \
0  3.89  1.284   9.9   1.1   1.7   6.2  3.66        None  None   None  None   
1  None   None  None  None  None  None  None  None  None  None   None  0.22   

  RA9opp RA9def RA9role  PPFp RA9avg   RAA   WAA  gmLI WAAadj   WAR   RAR  \
0   None   None    None  None   None  None  None  None   None  None  None   
1   0.14  104.0    4.64    -1   -0.1   .68  -0.1   1.5     17  .497  .499   

     waaWL% 162WL% Salary    first   last  key_bbref  
0 

  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


         Year Age    Tm    Lg     W     L  W-L%   ERA   G    GS    GF     CG  \
0  MIA (1 yr)      36.2    39  None  None  None  None  25  6.14  None   None   
1  NL (4 yrs)   9    11  .450  3.63   180     0   106   0     0    66  166.0   

    SHO    SV  IP     H     R    ER    HR    BB   IBB    SO   HBP    BK    WP  \
0  None  None   0  None  4.47  None  None  None  None  None  None  None  None   
1   131    76  67    12    83     7   189    10     1    17   704   119  3.51   

      BF  ERA+   FIP  WHIP    H9   HR9   BB9   SO9  SO/W Awards   RA9 RA9opp  \
0   None  None  None  None  None  None  None  None  None   None  0.03  -0.32   
1  1.289   7.1   0.7   4.5  10.2  2.28        None  None   None  None   None   

  RA9def RA9role  PPFp RA9avg   RAA   WAA  gmLI WAAadj   WAR   RAR  \
0   95.2    3.92    -9   -0.9  1.36  -0.2  -0.8     -6  .478  .495   
1   None    None  None   None  None  None  None   None  None  None   

       waaWL% 162WL% Salary     first    last  key_bbref  
0  $

  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


         Year Age    Tm    Lg     W     L  W-L%   ERA   G    GS    GF     CG  \
0  MIA (1 yr)      36.2    39  None  None  None  None  25  6.14  None   None   
1  NL (4 yrs)   9    11  .450  3.63   180     0   106   0     0    66  166.0   

    SHO    SV  IP     H     R    ER    HR    BB   IBB    SO   HBP    BK    WP  \
0  None  None   0  None  4.47  None  None  None  None  None  None  None  None   
1   131    76  67    12    83     7   189    10     1    17   704   119  3.51   

      BF  ERA+   FIP  WHIP    H9   HR9   BB9   SO9  SO/W Awards   RA9 RA9opp  \
0   None  None  None  None  None  None  None  None  None   None  0.03  -0.32   
1  1.289   7.1   0.7   4.5  10.2  2.28        None  None   None  None   None   

  RA9def RA9role  PPFp RA9avg   RAA   WAA  gmLI WAAadj   WAR   RAR  \
0   95.2    3.92    -9   -0.9  1.36  -0.2  -0.8     -6  .478  .495   
1   None    None  None   None  None  None  None   None  None  None   

       waaWL% 162WL% Salary     first    last  key_bbref  
0  $

  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


         Year Age Tm Lg     W   L W-L% ERA  G GS GF    CG SHO SV IP  H  R ER  \
0  LAD (1 yr)   0  0     3.68  13    0   4  0  0  0  14.2  13  6  6  3  4  0   

   HR BB IBB SO HBP   BK    WP     BF ERA+  FIP WHIP   H9   HR9 BB9   SO9  \
0  15  1   0  2  60  116  4.74  1.159  8.0  1.8  2.5  9.2  3.75      None   

   SO/W Awards   RA9 RA9opp RA9def RA9role RA9extras PPFp RA9avg  RAA   WAA  \
0  None   None  0.43  -0.29   0.00   103.3      3.82    0    0.0  .17  -0.1   

  gmLI WAAadj   WAR   RAR waaWL% 162WL% Salary  first     last  key_bbref  
0  0.1      2  .500  .500                 None  reyes  moronta  moronre01  


  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


         Year Age    Tm    Lg     W     L  W-L%   ERA   G    GS    GF    CG  \
0  KCR (1 yr)      38.2    51  None  None  None  None  13  3.03  None  None   
1  NL (2 yrs)   1     3  .250  3.78    87     0    17   0     0     0  66.2   

    SHO    SV  IP     H     R    ER    HR    BB   IBB    SO   HBP    BK    WP  \
0  None  None   0  None  4.64  None  None  None  None  None  None  None  None   
1    81    29  28     3    28     6    74     3     0     5   310    93  2.82   

      BF  ERA+   FIP  WHIP    H9   HR9   BB9   SO9  SO/W Awards   RA9 RA9opp  \
0   None  None  None  None  None  None  None  None  None   None  0.00  -0.35   
1  1.635  10.9   0.4   3.8  10.0  2.64        None  None   None  None   None   

  RA9def RA9role  PPFp RA9avg   RAA   WAA  gmLI WAAadj   WAR   RAR    waaWL%  \
0  100.8    4.33     6    0.6  1.04  -0.1   0.9     10  .512  .504  $925,000   
1   None    None  None   None  None  None  None   None  None  None      None   

  162WL% Salary first     last  key_

  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


         Year Age Tm    Lg     W   L W-L% ERA  G GS GF    CG SHO  SV  IP  H  \
0  IND (1 yr)   6  4  .600  2.22  11   10   1  9  0  1  89.0  98  50  22  1   

    R ER  HR BB IBB SO  HBP  BK    WP     BF ERA+  FIP WHIP   H9   HR9 BB9  \
0  13     18      0  2  376  93  2.72  1.247  9.9  0.1  1.3  1.8  1.38       

    SO9  SO/W Awards   RA9 RA9opp RA9def RA9role PPFp RA9avg   RAA   WAA gmLI  \
0  None  None   None  0.03   0.00   89.0    4.67   -4   -0.4  1.00  -0.2  0.3   

  WAAadj   WAR   RAR waaWL% 162WL% Salary first    last  key_bbref  
0      5  .465  .498                 None   tom  healey  healeto01  


  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


         Year Age   Tm     Lg     W     L  W-L%   ERA  G    GS    GF    CG  \
0  AL (2 yrs)   2    0  1.000  4.11    31     0     9  0     0     4  30.2   
1  SEA (1 yr)      7.2      5  None  None  None  None  2  2.35  None  None   

    SHO    SV  IP     H     R    ER    HR    BB   IBB    SO   HBP    BK    WP  \
0    28    16  14     6    12     0    24     0     0     2   129   106  5.29   
1  None  None   0  None  4.43  None  None  None  None  None  None  None  None   

      BF  ERA+   FIP  WHIP    H9   HR9   BB9   SO9  SO/W Awards   RA9 RA9opp  \
0  1.304   8.2   1.8   3.5   7.0  2.00        None  None   None  None   None   
1   None  None  None  None  None  None  None  None  None   None  0.13  -0.31   

  RA9def RA9role  PPFp RA9avg   RAA   WAA  gmLI WAAadj   WAR   RAR    waaWL%  \
0   None    None  None   None  None  None  None   None  None  None      None   
1   96.8    3.86     1    0.1  1.11   0.0   0.2      2  .526  .501  $535,000   

  162WL% Salary first   last  key_bbref

  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


            Year Age  Tm    Lg     W   L W-L% ERA  G GS GF   CG  SHO  SV  IP  \
0  162 Game Avg.   0  14  .000  2.25  68    0  41  0  0  0  54   109  14  14   

    H   R ER  HR BB IBB SO  HBP   BK    WP     BF  ERA+  FIP WHIP   H9   HR9  \
0  14  41  0  54  0   0  0  299  190  6.63  2.750  18.0  2.3  6.8  9.0  1.33   

  BB9   SO9  SO/W Awards    RA9 RA9opp RA9def RA9role PPFp RA9avg  RAA  WAA  \
0      None  None   None  -0.12  -0.32   93.7    3.82   14    1.4  .55  0.0   

  gmLI WAAadj   WAR   RAR waaWL% 162WL% Salary  first    last  key_bbref  
0  1.4     14  .514  .500                 None  angel  castro  castran01  


  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


            Year Age Tm Lg      W   L W-L% ERA  G GS GF   CG  SHO   SV   IP  \
0  162 Game Avg.   0  0     81.00  68    0   0  0  0  0  20   272  204  204   

    H    R ER HR BB IBB SO  HBP  BK     WP      BF   ERA+   FIP  WHIP   H9  \
0  68  136  0  0  0   0  0  476  11  60.19  18.000  108.0  27.0  54.0  0.0   

    HR9 BB9   SO9  SO/W Awards   RA9 RA9opp RA9def RA9role RA9extras  PPFp  \
0  0.00      None  None   None  1.84  -0.27   0.00    94.0      2.13  -204   

  RA9avg  RAA  WAA   gmLI WAAadj   WAR   RAR waaWL% 162WL% Salary  first  \
0  -13.6  .31  6.8  -13.6   -204  .273  .499                 None  edwar   

     last  key_bbref  
0  colina  colined01  


  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


            Year Age Tm    Lg     W   L W-L% ERA  G GS GF    CG  SHO  SV  IP  \
0  162 Game Avg.  15  9  .615  3.48  39   29   3  1  0  0  196   163  84  76   

    H   R ER   HR  BB IBB  SO  HBP   BK    WP     BF ERA+  FIP WHIP   H9  \
0  17  85  0  187  14   2  12  828  122  3.86  1.268  7.5  0.8  3.9  8.6   

    HR9 BB9   SO9  SO/W Awards   RA9 RA9opp RA9def RA9role RA9extras PPFp  \
0  2.20      None  None   None  0.45   0.17   0.01    99.3      4.36    9   

  RA9avg  RAA   WAA gmLI WAAadj   WAR   RAR waaWL% 162WL% Salary    first  \
0    1.1  .56  -0.1  2.9     30  .529  .502                 None  framber   

     last  key_bbref  
0  valdez  valdefr01  


  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


            Year Age Tm    Lg     W   L W-L% ERA  G GS GF   CG SHO  SV  IP  H  \
0  162 Game Avg.   4  4  .500  4.50  68    0  32  0  0  0  64   64  36  32  4   

    R ER  HR BB IBB SO  HBP  BK    WP     BF ERA+  FIP WHIP   H9   HR9 BB9  \
0  48  0  52  0   0  8  300  92  4.50  1.750  9.0  0.6  6.8  7.3  1.08       

    SO9  SO/W Awards    RA9 RA9opp RA9def RA9role PPFp RA9avg  RAA  WAA gmLI  \
0  None  None   None  -0.06  -0.34   96.5    4.07   -8   -0.8  .43  0.0  0.0   

  WAAadj   WAR   RAR waaWL% 162WL% Salary   first      last  key_bbref  
0      0  .489  .499                 None  andrew  carignan  carigan01  


  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


            Year Age  Tm    Lg     W   L W-L% ERA  G GS GF    CG  SHO  SV  IP  \
0  162 Game Avg.   4  12  .235  5.09  47   21   5  0  0  1  152   149  89  86   

    H   R ER   HR BB IBB SO  HBP  BK    WP     BF ERA+  FIP WHIP   H9   HR9  \
0  24  56  1  140  3   2  5  654  86  4.55  1.356  8.9  1.4  3.3  8.3  2.48   

  BB9   SO9  SO/W Awards   RA9 RA9opp RA9def RA9role RA9extras PPFp RA9avg  \
0      None  None   None  0.00   0.03   0.00   104.7      5.26   -2   -0.1   

   RAA   WAA gmLI WAAadj   WAR   RAR waaWL% 162WL% Salary   first  last  \
0  .79  -0.2  1.2     13  .498  .499                 None  keegan  akin   

  key_bbref  
0  akinke01  


  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


            Year Age Tm    Lg     W   L W-L% ERA  G GS GF   CG SHO  SV  IP  \
0  162 Game Avg.   6  4  .600  3.68  68    0  17  0  0  1  98   92  42  40   

    H   R ER  HR BB IBB SO  HBP   BK    WP     BF ERA+  FIP WHIP   H9   HR9  \
0  14  26  6  67  6   0  2  413  118  4.53  1.203  8.4  1.3  2.4  6.1  2.54   

  BB9   SO9  SO/W Awards   RA9 RA9opp RA9def RA9role PPFp RA9avg   RAA   WAA  \
0      None  None   None  0.12  -0.36   95.8    4.33    5    0.5  1.07  -0.3   

  gmLI WAAadj   WAR   RAR waaWL% 162WL% Salary  first   last  key_bbref  
0  1.3     16  .507  .502                 None  julio  mateo  mateoju01  


  pitching_combined = pitching_standard.append(pitching_value)
  huskies_p.append(pitching_combined)


         Year Age   Tm    Lg     W     L  W-L%   ERA  G     GS    GF     CG  \
0  NL (3 yrs)   4    7  .364  5.96    27    18     5  0      0     0  111.2   
1  SDP (1 yr)      3.1     4  None  None  None  None  4  10.80  None   None   

    SHO    SV  IP     H     R    ER    HR    BB   IBB    SO   HBP    BK    WP  \
0   122    79  74    14    37     0    67     4     0     1   488    79  4.68   
1  None  None   0  None  4.37  None  None  None  None  None  None  None  None   

      BF  ERA+   FIP  WHIP    H9   HR9   BB9   SO9  SO/W Awards    RA9 RA9opp  \
0  1.424   9.8   1.1   3.0   5.4  1.81        None  None   None   None   None   
1   None  None  None  None  None  None  None  None  None   None  -0.02  -0.32   

  RA9def RA9role  PPFp RA9avg   RAA   WAA  gmLI WAAadj   WAR   RAR    waaWL%  \
0   None    None  None   None  None  None  None   None  None  None      None   
1   90.8    3.70    -3   -0.3   .46   0.1  -0.2     -2  .437  .498  $390,000   

  162WL% Salary    first      las

In [394]:
huskies_p

Unnamed: 0,first,last,key_bbref,pitcher
0,bartolo,colon,colonba01,True
1,fernando,rodney,rodnefe01,True
2,reyes,moronta,moronre01,True
3,josé,mijares,mijarjo01,True
4,tom,healey,healeto01,True
5,jean,machi,machije01,True
6,fernando,rodney,rodnefe01,True
7,angel,castro,castran01,True
8,edwar,colina,colined01,True
9,framber,valdez,valdefr01,True


In [None]:
root = "https://www.baseball-reference.com/players/"
suffix = ".shtml"
count = 0
for target in range(len(targets)):
    target = targets[count]
    prepend = target[0] + "/"
    query = root + prepend + target + suffix
    print(query)
    response = requests.get(query)
    doc = BeautifulSoup(response.text)
    if huskies.pitcher[count] == True:
        table_a = doc.select("#pitching_standard > tfoot > tr:nth-child(2) .right")
        table_b = doc.select("#pitching_value > tfoot > tr:nth-child(2) .right")

        sleep(0.25)
        ##
        metrics_a = []
        metrics_b = []
        vals_a = []
        vals_b = []
        if table_a:
            for row in range(len(table_a)):
                metrics_a.append(table_a[row]['data-stat'])
                vals_a.append(table_a[row].text)

        else:
            print("couldn't find table a")
        sleep(1)
        if table_b:
            for row in range(len(table_b)):
                metrics_b.append(table_b[row]['data-stat'])
                vals_b.append(table_b[row].text)
            else:
                print("couldn't find table_b")
            # metrics.append(page_content[row]['data-stat'])
            # vals.append(page_content[row].text)
        # print(metrics)
        # print(vals)
        # for row in range(len(table_b)):
        #     metric = []
        #     val = []
        #     stat = []
        #     metrics_b.append(table_b[row]['data-stat'])
        #     vals_b.append(table_b[row].text)
        #     # metrics.append(page_content[row]['data-stat'])
        #     # vals.append(page_content[row].text)
        print("---")
        print(metrics_a)
        print(vals_a)
        print(metrics_b)
        print(vals_b)
    else:
        print('nah')


    # stats = pd.DataFrame({
    #     'metrics': metrics,
    #     'values': vals
    # })
    # sleep(.5)
    # count += 1
    # stats.set_index('metrics').T
    count += 1


https://www.baseball-reference.com/players/k/kirkal01.shtml
nah
https://www.baseball-reference.com/players/p/penabr01.shtml
nah
https://www.baseball-reference.com/players/s/sandopa01.shtml
nah
https://www.baseball-reference.com/players/w/whitety01.shtml
nah
https://www.baseball-reference.com/players/p/phegljo01.shtml
nah
https://www.baseball-reference.com/players/s/solando01.shtml
nah
https://www.baseball-reference.com/players/c/callaal01.shtml
nah
https://www.baseball-reference.com/players/t/tejadmi01.shtml
nah
https://www.baseball-reference.com/players/n/naylojo01.shtml
nah
https://www.baseball-reference.com/players/v/vicieda01.shtml
nah
https://www.baseball-reference.com/players/r/ramirha02.shtml
nah
https://www.baseball-reference.com/players/a/astudwi01.shtml
nah
https://www.baseball-reference.com/players/f/fothebo01.shtml
nah
https://www.baseball-reference.com/players/c/colonba01.shtml
---
['W', 'L', 'win_loss_perc', 'earned_run_avg', 'G', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H',

KeyboardInterrupt: 

In [None]:
stats

Unnamed: 0,metrics,values


In [None]:
metrics = []
vals = []
for row in range(len(page_content)):
    metric = []
    val = []
    stat = []
    metrics.append(page_content[row]['data-stat'])
    vals.append(page_content[row].text)

stats = pd.DataFrame({
    'metrics': metrics,
    'values': vals
})

stats.set_index('metrics').T

metrics,W,L,win_loss_perc,earned_run_avg,G,GS,GF,CG,SHO,SV,...,WP,batters_faced,earned_run_avg_plus,fip,whip,hits_per_nine,home_runs_per_nine,bases_on_balls_per_nine,strikeouts_per_nine,strikeouts_per_base_on_balls
values,15,11,0.568,4.12,34,34,0,2,1,0,...,3,892,106,4.15,1.312,9.3,1.1,2.5,6.6,2.67
