In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import seaborn as sb
import matplotlib.pyplot as plt
import matplotlib as mpl
import warnings
import numpy as np
from math import pi
from googlesearch import search
import time

In [3]:
def linkGen(player):
    query = player+" FBref"

    for j in search(query, tld="co.in", num=10, stop=10, pause=2):
        return j

In [4]:
linkGen("Harry Kane")

'https://fbref.com/en/players/21a66f6a/Harry-Kane'

In [5]:
def getPlayerData(x):
    warnings.filterwarnings("ignore")
    try:
        url = x
        page =requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')

        div=soup.find("div", {"class" : "filter switcher"})
        pos=div.find("a").contents[0][4:]

        name = [element.text for element in soup.find_all("span")]
        name = name[7]
        metric_names = []
        metric_values = []
        metric_percentiles = []
        remove_content = ["'", "[", "]", ","]
        for row in soup.findAll('table')[0].tbody.findAll('tr'):
            first_column = row.findAll('th')[0].contents
            if first_column==[]:
                continue
            metric_names.append(first_column[0])
        for row in soup.findAll('table')[0].tbody.findAll('tr'):
            first_column = row.findAll('td')[0].contents
            if first_column==[]:
                continue
            metric_values.append(first_column[0])
        for row in soup.findAll('table')[0].tbody.findAll('tr'):
            first_column = row.findAll('td')[1].contents
            if first_column==[]:
                continue
            metric_percentiles.append(int(first_column[0].contents[0]))
        for i in range(len(metric_values)):
            text=metric_values[i]
            if '%' in text:
                text=text[:-1]
            metric_values[i]=float(text)
        return [metric_names, metric_values, metric_percentiles, name, pos]
    except:
        return []

In [6]:
from tqdm import tqdm

In [7]:
#DataFrame of all players' Names from top 5 leagues
def getPlayers():
    warnings.filterwarnings("ignore")
    url = "https://fbref.com/en/comps/Big5/stats/players/Big-5-European-Leagues-Stats"
    page =requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    Name_Link = []
    for row in soup.findAll('table')[0].tbody.findAll('tr'):
        val=[]
        try:
            temp=row.findAll('td')[0].contents
            val.append(temp[0].contents[0])
            val.append(temp[0].get("href"))
            Name_Link.append(val)

        except:
            continue
    return Name_Link

In [8]:
class FBREFDataSet():

    def __init__(self):
        self.Name_Link=pd.read_csv("/content/drive/MyDrive/FBREF/NAME_DB.csv")
        self.Name_Link=[[self.Name_Link.iloc[i,1], self.Name_Link.iloc[i,2]] for i in range(self.Name_Link.shape[0])]
        self.Midfielders= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})
        self.Forwards= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})
        self.AtMid_Wingers= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})
        self.FullBacks= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})
        self.CenterBacks= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})
        self.GoalKeepers= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})


    def load_online(self):
        index=[0 for i in range(6)]
        Positions={'Forwards': self.Forwards,
                   'Att Mid / Wingers':self.AtMid_Wingers,
                   'Midfielders': self.Midfielders,
                   'Center Backs':self.CenterBacks,
                   'Fullbacks': self.FullBacks,
                   'Goalkeepers':self.GoalKeepers
                   }
        n = len(self.Name_Link)
        for i in tqdm(range(n)):
            name=self.Name_Link[i][0]
            data=getPlayerData('https://fbref.com'+self.Name_Link[i][1])
            time.sleep(3)
            print(data)
            if data!=[]:
                if data[-1]=='Forwards':
                    self.Forwards.loc[index[0]]=[name,data[1], data[2]]
                    index[0]=index[0]+1

                elif data[-1]=='Att Mid / Wingers':
                    self.Forwards.loc[index[1]]=[name,data[1], data[2]]
                    index[1]=index[1]+1

                elif data[-1]=='Midfielders':
                    self.Forwards.loc[index[2]]=[name,data[1], data[2]]
                    index[2]=index[2]+1

                elif data[-1]=='Center Backs':
                    self.Forwards.loc[index[3]]=[name,data[1], data[2]]
                    index[3]=index[3]+1

                elif data[-1]=='Fullbacks':
                    self.Forwards.loc[index[4]]=[name,data[1], data[2]]
                    index[4]=index[4]+1

                elif data[-1]=='Goalkeepers':
                    self.Forwards.loc[index[5]]=[name,data[1], data[2]]
                    index[5]=index[5]+1
            print(index)

    def load_offline(self):
        self.Midfielders=pd.read_csv('/content/drive/MyDrive/FBREF/Midfielders.csv')
        self.Forwards=pd.read_csv('/content/drive/MyDrive/FBREF/Forwards.csv')
        self.AtMid_Wingers=pd.read_csv('/content/drive/MyDrive/FBREF/AtMid_Wingers.csv')
        self.FullBacks=pd.read_csv('/content/drive/MyDrive/FBREF/FullBacks.csv')
        self.CenterBacks=pd.read_csv('/content/drive/MyDrive/FBREF/CenterBacks.csv')
        self.GoalKeepers=pd.read_csv('/content/drive/MyDrive/FBREF/GoalKeepers.csv')

    def save(self):
        self.Midfielders.to_csv('/content/drive/MyDrive/FBREF/Midfielders.csv')
        self.Forwards.to_csv('/content/drive/MyDrive/FBREF/Forwards.csv')
        self.AtMid_Wingers.to_csv('/content/drive/MyDrive/FBREF/AtMid_Wingers.csv')
        self.FullBacks.to_csv('/content/drive/MyDrive/FBREF/FullBacks.csv')
        self.CenterBacks.to_csv('/content/drive/MyDrive/FBREF/CenterBacks.csv')
        self.GoalKeepers.to_csv('/content/drive/MyDrive/FBREF/GoalKeepers.csv')

    def createDataBase(self, position, temp):

        index=0
        i=0
        n = len(self.Name_Link)
        for i in tqdm(range(n)):
            name=self.Name_Link[i][0]
            data=getPlayerData('https://fbref.com'+self.Name_Link[i][1])
            time.sleep(3)
            print(data)
            if data!=[] and data[-1]==position:
                temp.loc[index]=[name,data[1], data[2]]
                index=index+1
        print(temp)

In [None]:
DS = FBREFDataSet()