In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import numpy as np
import pandas as pd
import sys

In [5]:
fpl_url = 'https://fantasy.premierleague.com/a/statistics/total_points'

In [6]:
def get_player_history(soup, fpl):
    '''
    get:
    gw,player,team,position,form,report,opp,pts,mins,goals,
    assists,clean,conceded,own_goals,pen_saved,pen_missed,
    yellows,red,saves,bonus,bonus_sys,influence,creativity,
    threat,ict_index,net_transfers,selected_by,value
    fillna:
    fixture_difficulty
    '''
    bodydata = ['gw','opp','pts','mins','goals','assists',
               'clean','conceded','own_goals','pen_saved',
               'pen_missed','yellows','red','saves','bonus',
               'bonus_sys','influence','creativity','threat',
               'ict_index','net_transfers','selected_by','value']
    try:
        unique = soup.find('div', attrs={'id':'ismr-element-history-this', 'class':'ism-eiw-detail__item'})
        table = unique.find('table', class_ = 'ism-table').find('tbody')

        for row in table.findChildren('tr'):
            fpl['fixture_difficulty'].append(np.nan)
            for col,value in zip(bodydata, row.findChildren('td')):
                fpl[col].append(value.text)
            #get player, team, position, form, report
            report = soup.find('p', class_ = 'ism-element-status-bar__content')
            try:
                fpl['report'].append(report.text)
            except:
                fpl['report'].append(np.nan)
            player = soup.find('div', class_ = 'ism-eiw-properties__body__primary').find('h2')
            fpl['player'].append(player.text)
            team = soup.find('div', class_ = 'ism-eiw-properties__body__primary').find('div')
            fpl['team'].append(team.text)
            position = soup.find('div', class_ = ['ism-eiw-properties__body__et', 'ism-el-type ism-el-type--2'])
            fpl['position'].append(position.text)
            form = soup.find('div', class_ = 'ism-horizontal-data-list--basic__value')
            fpl['form'].append(form.text)
    except:
        for row in range(35):
            for key in fpl.keys():
                if (key != 'short_name'):
                    fpl[key].append(np.nan)

In [7]:
def get_player_fixtures(soup, fpl):
    '''
    get:
    gw,player,team,position,form,report,opp,fixture_difficulty
    fillna:
    rest
    '''
    #body = soup.find('div',class_=['table', 'ism-scroll-table'])
    #table = body.find('table', class_='ism-table').find('tbody')
    try:
        unique = soup.find('div', attrs={'id':'ismr-element-fixtures', 'class':'ism-eiw-detail__item'})
        table = unique.find('table', class_ = 'ism-table').find('tbody')

        for row in table.findChildren('tr'):
            for col, value in zip(['gw','opp','fixture_difficulty'], row.findAll('td')[1:]):
                fpl[col].append(value.text)

            report = soup.find('p', class_ = 'ism-element-status-bar__content')
            try:
                fpl['report'].append(report.text)
            except:
                fpl['report'].append(np.nan)
            player = soup.find('div', class_ = 'ism-eiw-properties__body__primary').find('h2')
            fpl['player'].append(player.text)
            team = soup.find('div', class_ = 'ism-eiw-properties__body__primary').find('div')
            fpl['team'].append(team.text)
            position = soup.find('div', class_ = ['ism-eiw-properties__body__et', 'ism-el-type ism-el-type--2'])
            fpl['position'].append(position.text)
            form = soup.find('div', class_ = 'ism-horizontal-data-list--basic__value')
            fpl['form'].append(form.text)

            missing = ['pts','mins','goals','assists',
                       'clean','conceded','own_goals','pen_saved',
                       'pen_missed','yellows','red','saves','bonus',
                       'bonus_sys','influence','creativity','threat',
                       'ict_index','net_transfers','selected_by','value']

            for col in missing:
                fpl[col].append(np.nan)
    except:
        for row in range(35):
            for key in fpl.keys():
                if (key != 'short_name'):
                    fpl[key].append(np.nan)

In [8]:
def get_player_short_name(player,soup,fpl,length):
    player_info = soup.find('table', class_ = ['ism-table', 'ism-table--el']).find('tbody').findChildren('tr')[player-1]
    short_name = player_info.find('div',class_=['ism-media__body', 'ism-table--el__primary-text']).find('a')
    for row in range(length):
        fpl['short_name'].append(short_name.text)

In [9]:
def get_player_data(player, driver, fpl, wait=1, firstpage=False):
    exitpath = "//*[@id='ismr-element']/div/div[1]/a//*[local-name() = 'svg']"
    entrypath = f'//*[@id="ismr-main"]/div/div[3]/table/tbody/tr[{player}]/td[2]/div/div[2]/a'
    fixtpath = '//*[@id="ismr-element"]/div/div[2]/div[2]/div[2]/ul/li[2]/a'
    alt_fixtpath = '//*[@id="ismr-element"]/div/div[2]/div[1]/div[2]/ul/li[2]/a'
    old = len(fpl['player'])
    home_soup = BeautifulSoup(driver.page_source)
    card = driver.find_element_by_xpath(entrypath)
    card.click()
    time.sleep(wait)
    soup = driver.execute_script("return document.documentElement.innerHTML")
    soup = BeautifulSoup(soup)
    
    get_player_history(soup, fpl)
    get_player_fixtures(soup, fpl)
    new = len(fpl['player'])
    length = old - new
    get_player_short_name(player, home_soup, fpl, length)
    
    exit = driver.find_element_by_xpath(exitpath)
    exit.click()

In [10]:
def java_script_spider(url):
    fpl = {
        'gw':[],
        'player':[],
        'team':[],
        'position':[],
        'form':[],
        'report':[],
        'opp':[],
        'pts':[],
        'mins':[],
        'goals':[],
        'assists':[],
        'clean':[],
        'conceded':[],
        'own_goals':[],
        'pen_saved':[],
        'pen_missed':[],
        'yellows':[],
        'red':[],
        'saves':[],
        'bonus':[],
        'bonus_sys':[],
        'influence':[],
        'creativity':[],
        'threat':[],
        'ict_index':[],
        'net_transfers':[],
        'selected_by':[],
        'value':[],
        'fixture_difficulty':[],
        'short_name':[]
    }
    options = Options()
    options.add_argument('user=agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15')
    driver = webdriver.Chrome(options = options)
    driver.implicitly_wait(3)
    driver.get(fpl_url)
    #Page 1 is now open
    pagetwo = '//*[@id="ismr-main"]/div/div[4]/a[1]/div[1]'
    nextpage = '//*[@id="ismr-main"]/div/div[4]/a[3]/div[1]'
    for p in range(0,18): #iterate through pages
        if p == 17: #iterate through final page
            for player in range(1,19):
                get_player_data(player, driver, fpl)
        else:
            for player in range(1,31): #iterate through page
                get_player_data(player, driver, fpl)
        if p == 0:
            button = driver.find_element_by_xpath(pagetwo)
        elif p == 17:
            break
        else:
            button = driver.find_element_by_xpath(nextpage)
        button.click()
        time.sleep(0.2)
    return fpl

In [8]:
fpl = java_script_spider(fpl_url)

In [11]:
fpl.pop('short_name')

[]

In [12]:
fpl_df = pd.DataFrame(fpl)
fpl_df.to_csv('fpl_raw_scrape.csv')

In [33]:
fpl_df.to_csv('fpl_raw_scrape.csv')

In [None]:
def java_script_spider(url):
    fpl = {
        'gw':[],
        'player':[],
        'team':[],
        'position':[],
        'form':[],
        'report':[],
        'opp':[],
        'pts':[],
        'mins':[],
        'goals':[],
        'assists':[],
        'clean':[],
        'conceded':[],
        'own_goals':[],
        'pen_saved':[],
        'pen_missed':[],
        'yellows':[],
        'red':[],
        'saves':[],
        'bonus':[],
        'bonus_sys':[],
        'influence':[],
        'creativity':[],
        'threat':[],
        'ict_index':[],
        'net_transfers':[],
        'selected_by':[],
        'value':[],
        'fixture_difficulty':[],
        'short_name':[]
    }
    options = Options()
    options.add_argument('user=agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15')
    driver = webdriver.Chrome(options = options)
    driver.implicitly_wait(3)
    driver.get(fpl_url)
    #Page 1 is now open
    pagetwo = '//*[@id="ismr-main"]/div/div[4]/a[1]/div[1]'
    nextpage = '//*[@id="ismr-main"]/div/div[4]/a[3]/div[1]'
    for p in range(0,18): #iterate through pages
        if p == 17: #iterate through final page
            for player in range(1,19):
                get_player_data(player, driver, fpl)
        else:
            for player in range(1,31): #iterate through page
                get_player_data(player, driver, fpl)
        if p == 0:
            button = driver.find_element_by_xpath(pagetwo)
        elif p == 17:
            break
        else:
            button = driver.find_element_by_xpath(nextpage)
        button.click()
        time.sleep(0.2)
    return fpl

In [17]:
def get_luke_shaw():
    shaw = {
        'gw':[],
        'player':[],
        'team':[],
        'position':[],
        'form':[],
        'report':[],
        'opp':[],
        'pts':[],
        'mins':[],
        'goals':[],
        'assists':[],
        'clean':[],
        'conceded':[],
        'own_goals':[],
        'pen_saved':[],
        'pen_missed':[],
        'yellows':[],
        'red':[],
        'saves':[],
        'bonus':[],
        'bonus_sys':[],
        'influence':[],
        'creativity':[],
        'threat':[],
        'ict_index':[],
        'net_transfers':[],
        'selected_by':[],
        'value':[],
        'fixture_difficulty':[],
        'short_name':[]
    }
    options = Options()
    options.add_argument('user=agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15')
    driver = webdriver.Chrome(options = options)
    driver.implicitly_wait(3)
    driver.get(fpl_url)
    #Page 1 is now open
    pagetwo = '//*[@id="ismr-main"]/div/div[4]/a[1]/div[1]'
    nextpage = '//*[@id="ismr-main"]/div/div[4]/a[3]/div[1]'
    for p in range(5):
        if p == 4:
            get_player_data(14,driver,shaw)
        if p == 0:
            button = driver.find_element_by_xpath(pagetwo)
        elif p == 4:
            break
        else:
            button = driver.find_element_by_xpath(nextpage)
        button.click()
        time.sleep(0.2)
    
    return shaw
        

In [26]:
shaw = get_luke_shaw()
shaw.pop('short_name')

[]

In [29]:
shaw_pd = pd.DataFrame(shaw)
shaw_pd.to_csv('shaw.csv')