In [188]:
import numpy as np
import pandas as pd
import requests
import json
import bs4
from datetime import date
from bs4 import BeautifulSoup as bs

# Web Scraping From ESPN.com

The data we need is located on the ESPN website, but since there is no way to export this data we will use web scraping to compile it into a JSON file.

In [189]:
def save_to_json(name, year, url, pos, age, status, prev_team, new_team, years_signed, dollars):
    with open('players.json') as in_file:
        data = json.load(in_file)
        
        already_exists = False
        for player in data:
            if player['url'] == url and player['year'] == year:
                already_exists = True
        
        if not already_exists:
            new_player = {}
            new_player['name'] = name
            new_player['year'] = year
            new_player['url'] = url
            new_player['pos'] = pos
            new_player['age'] = age
            new_player['status'] = status
            new_player['prev_team'] = prev_team
            new_player['new_team'] = new_team
            new_player['years_signed'] = years_signed
            new_player['dollars'] = dollars
            
            data.append(new_player)
            
            with open('players.json', 'w') as out_file:
                json.dump(data, out_file, indent=4)
                
            print("Saved: {}". format(name))
        else:
            print("Already Exists: {} ".format(name))

In [190]:
min_year = 2006
max_year = 2018

In [193]:
for year in range(min_year, max_year + 1):
    print("PARSING YEAR", year)
    # Send a request to the ESPN page containing the data we need
    response = requests.get('http://www.espn.com/mlb/freeagents/_/year/' + str(year))
    content = response.content
    print("Connection Status Code:", response.status_code)

    # Parse the HTML to find the table cells
    parser = bs(content, 'html.parser')
    table_rows = parser.find_all('tr', class_=['oddrow', 'evenrow'])
    # Save the data in a JSON file
    for row in table_rows:
        cells = row.select('td')

        name = cells[0].text
        year = year
        pos = cells[1].text
        status = cells[3].text
        prev_team = cells[4].text
        new_team = cells[5].text
        years_signed = cells[6].text
        dollars = cells[8].text
        
        # some players have no urls, so we will just skip them
        try:
            url = cells[0].select('a')[0]['href']
        except IndexError:
            continue
            
        # if a player has no age listed, we will also skip them
        try:
            age = int(cells[2].text) - (date.today().year - year)
        except ValueError:
            continue        

        save_to_json(name, year, url, pos, age, status, prev_team, new_team, years_signed, dollars)

PARSING YEAR 2006
Connection Status Code: 200
Already Exists: Matt Albers 
Already Exists: Sandy Alomar Jr. 
Already Exists: Moises Alou 
Already Exists: Rick Ankiel 
Already Exists: Tony Armas 
Already Exists: Rich Aurilia 
Already Exists: Danys Baez 
Already Exists: Jeff Bagwell 
Already Exists: Paul Bako 
Already Exists: Rod Barajas 
Already Exists: Miguel Batista 
Already Exists: David Bell 
Already Exists: Ronnie Belliard 
Already Exists: Gary Bennett 
Already Exists: Kris Benson 
Already Exists: Henry Blanco 
Already Exists: Geoff Blum 
Already Exists: Barry Bonds 
Already Exists: Aaron Boone 
Already Exists: Joe Borowski 
Already Exists: Chad Bradford 
Already Exists: Doug Brocail 
Already Exists: Jason Bulger 
Already Exists: Jeromy Burnitz 
Already Exists: Marlon Byrd 
Already Exists: Miguel Cairo 
Already Exists: Jamey Carroll 
Already Exists: Sean Casey 
Already Exists: Vinny Castilla 
Already Exists: Frank Catalanotto 
Already Exists: Bruce Chen 
Already Exists: Jeff Cirill

Already Exists: Jose Guillen 
Already Exists: Jerry Hairston Jr. 
Already Exists: Willie Harris 
Already Exists: LaTroy Hawkins 
Already Exists: Mark Hendrickson 
Already Exists: Matt Herges 
Already Exists: Livan Hernandez 
Already Exists: Eric Hinske 
Already Exists: Torii Hunter 
Already Exists: Tadahito Iguchi 
Already Exists: Cesar Izturis 
Already Exists: Geoff Jenkins 
Already Exists: Jason Jennings 
Already Exists: D'Angelo Jimenez 
Already Exists: Ben Johnson 
Already Exists: Andruw Jones 
Already Exists: Jorge Julio 
Already Exists: Jason Kendall 
Already Exists: Ryan Ketchner 
Already Exists: Bobby Kielty 
Already Exists: Byung-Hyun Kim 
Already Exists: Ray King 
Already Exists: Ryan Klesko 
Already Exists: Masa Kobayashi 
Already Exists: Corey Koskie 
Already Exists: Mike Lamb 
Already Exists: Jason Lane 
Already Exists: Jason LaRue 
Already Exists: Jon Lieber 
Already Exists: Mike Lieberthal 
Already Exists: Scott Linebrink 
Already Exists: Paul Lo Duca 
Already Exists: Ke

Already Exists: Scott Podsednik 
Already Exists: Sidney Ponson 
Already Exists: Mark Prior 
Already Exists: Scott Proctor 
Already Exists: Nick Punto 
Already Exists: Horacio Ramirez 
Already Exists: Manny Ramirez 
Already Exists: Tim Redding 
Already Exists: Edgar Renteria 
Already Exists: Argenis Reyes 
Already Exists: Dennys Reyes 
Already Exists: Arthur Rhodes 
Already Exists: Juan Rincon 
Already Exists: Ricardo Rincon 
Already Exists: Luis Rivas 
Already Exists: Juan Rivera 
Already Exists: Francisco Rodriguez 
Already Exists: Iván Rodríguez 
Already Exists: Kenny Rogers 
Already Exists: David Ross 
Already Exists: Glendon Rusch 
Already Exists: CC Sabathia 
Already Exists: Takashi Saito 
Already Exists: Jeff Salazar 
Already Exists: Curt Schilling 
Already Exists: Rudy Seanez 
Already Exists: Ben Sheets 
Already Exists: Brian Shouse 
Already Exists: Jason Smith 
Already Exists: John Smoltz 
Already Exists: Russ Springer 
Already Exists: Mike Sweeney 
Already Exists: Julian Tavar

Saved: Kevin Correia
Saved: Craig Counsell
Saved: Jesse Crain
Saved: Carl Crawford
Saved: Jack Cust
Saved: Johnny Damon
Saved: Doug Davis
Saved: Jorge De La Rosa
Saved: Manny Delcarmen
Saved: Elmer Dessens
Saved: Argenis Diaz
Saved: Matt Diaz
Saved: Octavio Dotel
Saved: Scott Downs
Saved: Justin Duchscherer
Saved: Adam Dunn
Saved: Chad Durbin
Saved: David Eckstein
Saved: Jim Edmonds
Saved: Edwin Encarnacion
Saved: Kelvim Escobar
Saved: Kyle Farnsworth
Saved: Pedro Feliciano
Saved: Pedro Feliz
Saved: Josh Fields
Saved: Randy Flores
Saved: Jeff Francis
Saved: Frank Francisco
Saved: Kevin Frandsen
Saved: Jason Frasor
Saved: Brian Fuentes
Saved: Freddy Garcia
Saved: Jon Garland
Saved: Sammy Gervacio
Saved: Jason Giambi
Saved: Troy Glaus
Saved: Sean Green
Saved: Kevin Gregg
Saved: Vladimir Guerrero
Saved: Matt Guerrier
Saved: Jose Guillen
Saved: Cristian Guzman
Saved: Tony Gwynn
Saved: Scott Hairston
Saved: Jerry Hairston Jr.
Saved: Bill Hall
Saved: Mike Hampton
Saved: Aaron Harang
Saved: W

Saved: Mark DeRosa
Saved: Matt Diaz
Saved: Stephen Drew
Saved: Zach Duke
Saved: Chad Durbin
Saved: Kyle Farnsworth
Saved: Scott Feldman
Saved: Pedro Feliciano
Saved: Jeff Francis
Saved: Jason Frasor
Saved: Brian Fuentes
Saved: Kyuji Fujikawa
Saved: Freddy Garcia
Saved: Chad Gaudin
Saved: Jason Giambi
Saved: Jonny Gomes
Saved: Alex Gonzalez
Saved: Mike Gonzalez
Saved: Tom Gorzelanny
Saved: Zack Greinke
Saved: Jason Grilli
Saved: Jeremy Guthrie
Saved: Travis Hafner
Saved: Scott Hairston
Saved: Bill Hall
Saved: Josh Hamilton
Saved: Jack Hannahan
Saved: Dan Haren
Saved: LaTroy Hawkins
Saved: Roberto Hernandez
Saved: Eric Hinske
Saved: J.P. Howell
Saved: Orlando Hudson
Saved: Aubrey Huff
Saved: Torii Hunter
Saved: Raul Ibanez
Saved: Brandon Inge
Saved: Jason Isringhausen
Saved: Hisashi Iwakuma
Saved: Maicer Izturis
Saved: Edwin Jackson
Saved: Kelly Johnson
Saved: Nick Johnson
Saved: Reed Johnson
Saved: Andruw Jones
Saved: Chipper Jones
Saved: Jair Jurrjens
Saved: Jeff Karstens
Saved: Austin

Saved: Alberto Callaspo
Saved: Chris Capuano
Saved: Alexi Casilla
Saved: Joba Chamberlain
Saved: Endy Chavez
Saved: Phil Coke
Saved: Kevin Correia
Saved: Neal Cotts
Saved: Nelson Cruz
Saved: Michael Cuddyer
Saved: Ryan Dempster
Saved: Chris Denorfia
Saved: Daniel Descalso
Saved: Ryan Doumit
Saved: Scott Downs
Saved: Stephen Drew
Saved: Zach Duke
Saved: Mark Ellis
Saved: Gavin Floyd
Saved: Kevin Frandsen
Saved: Jason Frasor
Saved: Kyuji Fujikawa
Saved: Rafael Furcal
Saved: Jason Giambi
Saved: Jonny Gomes
Saved: Tom Gorzelanny
Saved: Luke Gregerson
Saved: Kevin Gregg
Saved: Jason Grilli
Saved: Scott Hairston
Saved: Jason Hammel
Saved: Jack Hannahan
Saved: Joel Hanrahan
Saved: Aaron Harang
Saved: Corey Hart
Saved: Chase Headley
Saved: Roberto Hernandez
Saved: Rich Hill
Saved: Luke Hochevar
Saved: Nick Hundley
Saved: Torii Hunter
Saved: Raul Ibanez
Saved: Casey Janssen
Saved: Derek Jeter
Saved: Jim Johnson
Saved: Josh Johnson
Saved: Kelly Johnson
Saved: Reed Johnson
Saved: Munenori Kawasak

Saved: Boone Logan
Saved: James Loney
Saved: Javier Lopez
Saved: Seth Maness
Saved: Jeff Manship
Saved: Jeff Mathis
Saved: Casey McGehee
Saved: Dustin McGowan
Saved: Kris Medlen
Saved: Mark Melancon
Saved: Thomas Milone
Saved: Kendrys Morales
Saved: Mitch Moreland
Saved: Justin Morneau
Saved: Logan Morrison
Saved: Brandon Morrow
Saved: Charlie Morton
Saved: Brandon Moss
Saved: Peter Moylan
Saved: Conor Mullee
Saved: Mike Napoli
Saved: Chris Narveson
Saved: Joe Nathan
Saved: Dioner Navarro
Saved: Jon Niese
Saved: Ivan Nova
Saved: Eric O'Flaherty
Saved: Sean O'Sullivan
Saved: Ross Ohlendorf
Saved: Logan Ondrusek
Saved: Angel Pagan
Saved: Jonathan Papelbon
Saved: Steve Pearce
Saved: Jake Peavy
Saved: Gregorio Petit
Saved: Yusmeiro Petit
Saved: A.J. Pierzynski
Saved: Jose Pirela
Saved: Trevor Plouffe
Saved: Ryan Raburn
Saved: Alexei Ramirez
Saved: Wilson Ramos
Saved: Colby Rasmus
Saved: Josh Reddick
Saved: Nolan Reimold
Saved: Ben Revere
Saved: Mark Reynolds
Saved: Clayton Richard
Saved: F

Saved: Billy Hamilton
Saved: Jason Hammel
Saved: J.A. Happ
Saved: Bryce Harper
Saved: Josh Harrison
Saved: Matt Harvey
Saved: Chase Headley
Saved: Adeiny Hechavarria
Saved: Jeremy Hellickson
Saved: Gorkys Hernandez
Saved: Kelvin Herrera
Saved: Chris Herrmann
Saved: Derek Holland
Saved: Greg Holland
Saved: Matt Holliday
Saved: J.J. Hoover
Saved: Daniel Hudson
Saved: Phil Hughes
Saved: Nick Hundley
Saved: Drew Hutchison
Saved: Jose Iglesias
Saved: Austin Jackson
Saved: Edwin Jackson
Saved: Jon Jay
Saved: Dan Jennings
Saved: Kevin Jepsen
Saved: Jim Johnson
Saved: Adam Jones
Saved: Caleb Joseph
Saved: Tommy Joseph
Saved: Matt Joyce
Saved: Jung Ho Kang
Saved: Nate Karns
Saved: Shawn Kelley
Saved: Joe Kelly
Saved: Dallas Keuchel
Saved: Yusei Kikuchi
Saved: Craig Kimbrel
Saved: Ian Kinsler
Saved: Patrick Kivlehan
Saved: George Kontos
Saved: Ian Krol
Saved: DJ LeMahieu
Saved: Adam Liberatore
Saved: Tim Lincecum
Saved: Francisco Liriano
Saved: Jose Lobaton
Saved: Boone Logan
Saved: Aaron Loup
S