In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import seaborn as sb
import matplotlib.pyplot as plt
import matplotlib as mpl
import warnings
import numpy as np
from math import pi
from googlesearch import search
import time

In [2]:
def linkGen(player):
    query = player+" FBref"

    for j in search(query, num_results=10, sleep_interval=2):
        return j

In [38]:
linkGen("Harry Kane")

'https://fbref.com/en/players/21a66f6a/Harry-Kane'

In [39]:
data_dict={
    'Name':[],
    'Height':[],
    'Weight':[],
    'Club':[],
    'Nation':[],
    'Non-Penalty Goals':[],
    'npxG: Non-Penalty xG':[],
    'Shots Total':[],
    'Assists':[],
    'xAG: Exp. Assisted Goals':[],
    'npxG + xAG':[],
    'Shot-Creating Actions':[],
    'Passes Attempted':[],
    'Pass Completion %':[],
    'Progressive Passes':[],
    'Progressive Carries':[],
    'Successful Take-Ons':[],
    'Touches (Att Pen)':[],
    'Progressive Passes Rec':[],
    'Tackles':[],
    'Interceptions':[],
    'Blocks':[],
    'Clearances':[],
    'Aerials Won':[]
}

In [53]:
def getPlayerData(x):
    warnings.filterwarnings("ignore")
    try:
        url = x
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
        
        # Send a request to the provided URL with headers
        page = requests.get(url, headers=headers)
        soup = BeautifulSoup(page.content, 'html.parser')
        # print(soup)
        div=soup.find("div", {"class" : "filter switcher"})
        pos=div.find("a").contents[0][4:]

        data = [element.text for element in soup.find_all("span")]
        # print(data)
        name = data[7]
        height = data[8]
        weight = data[9]
        country = soup.find('strong', text='National Team:').find_next_sibling('a').text.strip()
        
        # Extract club
        club = soup.find('strong', text='Club:').find_next_sibling('a').text.strip()

        metric_names = []
        metric_values = []
        metric_percentiles = []
        remove_content = ["'", "[", "]", ","]
        for row in soup.findAll('table')[0].tbody.findAll('tr'):
            first_column = row.findAll('th')[0].contents
            if first_column==[]:
                continue
            metric_names.append(first_column[0])
        for row in soup.findAll('table')[0].tbody.findAll('tr'):
            first_column = row.findAll('td')[0].contents
            if first_column==[]:
                continue
            metric_values.append(first_column[0])
        for row in soup.findAll('table')[0].tbody.findAll('tr'):
            first_column = row.findAll('td')[1].contents
            if first_column==[]:
                continue
            metric_percentiles.append(int(first_column[0].contents[0]))
        for i in range(len(metric_values)):
            text=metric_values[i]
            if '%' in text:
                text=text[:-1]
            metric_values[i]=float(text)
        
        data_dict['Name'].append(name)
        data_dict['Height'].append(height)
        data_dict['Weight'].append(weight)
        data_dict['Club'].append(club)
        data_dict['Nation'].append(country)
        try:
            for i in range(len(metric_names)):
                data_dict[metric_names[i]].append(metric_percentiles[i])
        except Exception as e:
            print(f"An error occurred: {e}")



        return [metric_names, metric_values, metric_percentiles, name, pos, height, weight, club, country]
    except Exception as e:
        print(url, e)
        return []

In [51]:
getPlayerData(linkGen("Harry Kane"))

[['Non-Penalty Goals',
  'npxG: Non-Penalty xG',
  'Shots Total',
  'Assists',
  'xAG: Exp. Assisted Goals',
  'npxG + xAG',
  'Shot-Creating Actions',
  'Passes Attempted',
  'Pass Completion %',
  'Progressive Passes',
  'Progressive Carries',
  'Successful Take-Ons',
  'Touches (Att Pen)',
  'Progressive Passes Rec',
  'Tackles',
  'Interceptions',
  'Blocks',
  'Clearances',
  'Aerials Won'],
 [0.8,
  0.69,
  3.89,
  0.28,
  0.18,
  0.87,
  2.69,
  20.74,
  71.1,
  3.46,
  1.08,
  0.66,
  5.09,
  4.55,
  0.47,
  0.14,
  0.54,
  0.45,
  1.25],
 [98, 97, 92, 89, 79, 96, 65, 45, 53, 89, 32, 44, 57, 18, 31, 34, 35, 24, 36],
 'Harry Kane',
 'Forwards',
 '188cm',
 '73kg',
 'Bayern Munich',
 'England']

In [83]:
data_dict

{'Name': ['Harry Kane'],
 'Height': ['188cm'],
 'Weight': ['73kg'],
 'Club': ['Bayern Munich'],
 'Nation': ['England'],
 'Non-Penalty Goals': [98],
 'npxG: Non-Penalty xG': [97],
 'Shots Total': [92],
 'Assists': [89],
 'xAG: Exp. Assisted Goals': [79],
 'npxG + xAG': [96],
 'Shot-Creating Actions': [65],
 'Passes Attempted': [45],
 'Pass Completion %': [53],
 'Progressive Passes': [89],
 'Progressive Carries': [32],
 'Successful Take-Ons': [44],
 'Touches (Att Pen)': [57],
 'Progressive Passes Rec': [18],
 'Tackles': [31],
 'Interceptions': [34],
 'Blocks': [35],
 'Clearances': [24],
 'Aerials Won': [36]}

In [7]:
from tqdm import tqdm

In [7]:
#DataFrame of all players' Names from top 5 leagues
def getPlayers():
    warnings.filterwarnings("ignore")
    url = "https://fbref.com/en/comps/Big5/stats/players/Big-5-European-Leagues-Stats"
    page =requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    Name_Link = []
    for row in soup.findAll('table')[0].tbody.findAll('tr'):
        val=[]
        try:
            temp=row.findAll('td')[0].contents
            val.append(temp[0].contents[0])
            val.append(temp[0].get("href"))
            Name_Link.append(val)

        except:
            continue
    return Name_Link

In [8]:
class FBREFDataSet():

    def __init__(self):
        self.Name_Link=pd.read_csv("/content/drive/MyDrive/FBREF/NAME_DB.csv")
        self.Name_Link=[[self.Name_Link.iloc[i,1], self.Name_Link.iloc[i,2]] for i in range(self.Name_Link.shape[0])]
        self.Midfielders= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})
        self.Forwards= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})
        self.AtMid_Wingers= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})
        self.FullBacks= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})
        self.CenterBacks= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})
        self.GoalKeepers= pd.DataFrame({'Name': pd.Series(dtype='str'),
                   'Attribute Vector': pd.Series(dtype='object'),
                         'Percentiles': pd.Series(dtype='object')})


    def load_online(self):
        index=[0 for i in range(6)]
        Positions={'Forwards': self.Forwards,
                   'Att Mid / Wingers':self.AtMid_Wingers,
                   'Midfielders': self.Midfielders,
                   'Center Backs':self.CenterBacks,
                   'Fullbacks': self.FullBacks,
                   'Goalkeepers':self.GoalKeepers
                   }
        n = len(self.Name_Link)
        for i in tqdm(range(n)):
            name=self.Name_Link[i][0]
            data=getPlayerData('https://fbref.com'+self.Name_Link[i][1])
            time.sleep(3)
            print(data)
            if data!=[]:
                if data[-1]=='Forwards':
                    self.Forwards.loc[index[0]]=[name,data[1], data[2]]
                    index[0]=index[0]+1

                elif data[-1]=='Att Mid / Wingers':
                    self.Forwards.loc[index[1]]=[name,data[1], data[2]]
                    index[1]=index[1]+1

                elif data[-1]=='Midfielders':
                    self.Forwards.loc[index[2]]=[name,data[1], data[2]]
                    index[2]=index[2]+1

                elif data[-1]=='Center Backs':
                    self.Forwards.loc[index[3]]=[name,data[1], data[2]]
                    index[3]=index[3]+1

                elif data[-1]=='Fullbacks':
                    self.Forwards.loc[index[4]]=[name,data[1], data[2]]
                    index[4]=index[4]+1

                elif data[-1]=='Goalkeepers':
                    self.Forwards.loc[index[5]]=[name,data[1], data[2]]
                    index[5]=index[5]+1
            print(index)

    def load_offline(self):
        self.Midfielders=pd.read_csv('/content/drive/MyDrive/FBREF/Midfielders.csv')
        self.Forwards=pd.read_csv('/content/drive/MyDrive/FBREF/Forwards.csv')
        self.AtMid_Wingers=pd.read_csv('/content/drive/MyDrive/FBREF/AtMid_Wingers.csv')
        self.FullBacks=pd.read_csv('/content/drive/MyDrive/FBREF/FullBacks.csv')
        self.CenterBacks=pd.read_csv('/content/drive/MyDrive/FBREF/CenterBacks.csv')
        self.GoalKeepers=pd.read_csv('/content/drive/MyDrive/FBREF/GoalKeepers.csv')

    def save(self):
        self.Midfielders.to_csv('/content/drive/MyDrive/FBREF/Midfielders.csv')
        self.Forwards.to_csv('/content/drive/MyDrive/FBREF/Forwards.csv')
        self.AtMid_Wingers.to_csv('/content/drive/MyDrive/FBREF/AtMid_Wingers.csv')
        self.FullBacks.to_csv('/content/drive/MyDrive/FBREF/FullBacks.csv')
        self.CenterBacks.to_csv('/content/drive/MyDrive/FBREF/CenterBacks.csv')
        self.GoalKeepers.to_csv('/content/drive/MyDrive/FBREF/GoalKeepers.csv')

    def createDataBase(self, position, temp):

        index=0
        i=0
        n = len(self.Name_Link)
        for i in tqdm(range(n)):
            name=self.Name_Link[i][0]
            data=getPlayerData('https://fbref.com'+self.Name_Link[i][1])
            time.sleep(3)
            print(data)
            if data!=[] and data[-1]==position:
                temp.loc[index]=[name,data[1], data[2]]
                index=index+1
        print(temp)

In [None]:
DS = FBREFDataSet()

In [8]:
def linkGenTf(player):
    query = player+" transfermarkt"

    for j in search(query, num_results=10, sleep_interval=2):
        return j

In [9]:
linkGenTf('Harry kane')

'https://www.transfermarkt.co.in/harry-kane/profil/spieler/132098'

In [10]:
import re

In [11]:
def get_current_value(url):
    # Send a request to the provided URL
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    
    # Send a request to the provided URL with headers
    response = requests.get(url, headers=headers)
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find the meta tag with the name 'description'
        meta_tag = soup.find('meta', attrs={'name': 'description'})
        
        if meta_tag:
            # Extract the content attribute
            content = meta_tag.get('content', '')
            
            # Use regex to find the value in the format ₹ followed by a number and 'Cr'
            # market_value = re.search(r'₹[0-9]+(?:\.[0-9]+)?\s*Cr', content)
            market_value = re.search(r'₹([0-9]+(?:\.[0-9]+)?)\s*Cr', content)
            if market_value:
                # return market_value.group()
                # Extract the numeric part using group(1)
                numeric_value = market_value.group(1)
                
                try:
                    return int(numeric_value)*108041.60/1000000
                    #Value in M Euros
                except:
                    return "Not Int"
            else:
                return "Market value not found in meta tag"
        else:
            return "Meta tag not found"
    else:
        return f"Failed to retrieve page, status code: {response.status_code}"

In [12]:
get_current_value(linkGenTf('Hakan Çalhanoğlu')) 

38.894976

In [13]:
name_df=pd.read_excel('NameList.xlsx')

In [14]:
names=name_df['Name'].values.tolist()

In [15]:
names

['Manuel Akanji',
 'David Alaba',
 'Joachim Andersen',
 'Ronald Araujo',
 'Alessandro Bastoni',
 'Sven Botman',
 'Trevoh Chalobah',
 'Andreas Christensen',
 'Danilo',
 'Eric Dier',
 'Luiz Felipe',
 'Wesley Fofana',
 'Roger Ibanez',
 'Pierre Kalulu',
 'Presnel Kimpembe',
 'Ibrahima Konate',
 'Kalidou Koulibaly',
 'Maxence Lacroix',
 'Aymeric Laporte',
 'Robin Le Normand',
 'Matthijs de Ligt',
 'Victor Lindelof',
 'Harry Maguire',
 'Marquinhos',
 'Lisandro Martinez',
 'Konstantinos Mavropanos',
 'Kim Min-jae',
 'Tyrone Mings',
 'Benjamin Pavard',
 'Sergio Ramos',
 'Cristian Romero',
 'Antonio Rudiger',
 'William Saliba',
 'Alex Sandro',
 'Giorgio Scalvini',
 'Fabian Schar',
 'Nico Schlotterbeck',
 'Thiago Silva',
 'Chris Smalling',
 'Jonathan Tah',
 'Dayot Upamecano',
 'Virgil van Dijk',
 'Raphael Varane',
 'Micky van de Ven',
 'Stefan de Vrij',
 'Axel Witsel',
 'Leny Yoro',
 'Kurt Zouma',
 'Karim Adeyemi',
 'Antony',
 'Harvey Barnes',
 'Sofiane Boufal',
 'Jarrod Bowen',
 'Julian Brandt'

In [16]:
import time

In [17]:
from tqdm import tqdm

In [56]:
data_dict={
    'Name':[],
    'Height':[],
    'Weight':[],
    'Club':[],
    'Nation':[],
    'Non-Penalty Goals':[],
    'npxG: Non-Penalty xG':[],
    'Shots Total':[],
    'Assists':[],
    'xAG: Exp. Assisted Goals':[],
    'npxG + xAG':[],
    'Shot-Creating Actions':[],
    'Passes Attempted':[],
    'Pass Completion %':[],
    'Progressive Passes':[],
    'Progressive Carries':[],
    'Successful Take-Ons':[],
    'Touches (Att Pen)':[],
    'Progressive Passes Rec':[],
    'Tackles':[],
    'Interceptions':[],
    'Blocks':[],
    'Clearances':[],
    'Aerials Won':[]
}

In [57]:
for name in tqdm(names[0:50]):
    getPlayerData(linkGen(name))
    time.sleep(6)

 10%|█         | 5/50 [00:38<05:44,  7.65s/it]

https://fbref.com/en/players/ccce7025/Sven-Botman 'NoneType' object has no attribute 'find_next_sibling'


 12%|█▏        | 6/50 [00:46<05:34,  7.60s/it]

https://fbref.com/en/players/5515376c/Trevoh-Chalobah 'NoneType' object has no attribute 'find_next_sibling'


 20%|██        | 10/50 [01:17<05:14,  7.87s/it]

https://fbref.com/en/players/15d0c248/Luiz-Felipe 'NoneType' object has no attribute 'find'


 22%|██▏       | 11/50 [01:25<05:03,  7.77s/it]

https://fbref.com/en/players/132a82f1/Wesley-Fofana 'NoneType' object has no attribute 'find'


 24%|██▍       | 12/50 [01:32<04:52,  7.70s/it]

https://fbref.com/en/players/82efe6fa/Roger-Ibanez 'NoneType' object has no attribute 'find'


 26%|██▌       | 13/50 [01:40<04:46,  7.75s/it]

https://fbref.com/en/players/85f7d1be/Pierre-Kalulu 'NoneType' object has no attribute 'find_next_sibling'


 28%|██▊       | 14/50 [01:48<04:40,  7.78s/it]

https://fbref.com/en/players/aef6e670/Presnel-Kimpembe 'NoneType' object has no attribute 'find'


 32%|███▏      | 16/50 [02:03<04:18,  7.60s/it]

https://fbref.com/en/players/da974c7b/Kalidou-Koulibaly 'NoneType' object has no attribute 'find'


 34%|███▍      | 17/50 [02:10<04:13,  7.67s/it]

https://fbref.com/en/players/277c49ed/Maxence-Lacroix 'NoneType' object has no attribute 'find_next_sibling'


 36%|███▌      | 18/50 [02:18<04:01,  7.56s/it]

https://fbref.com/en/players/119b9a8e/Aymeric-Laporte 'NoneType' object has no attribute 'find'


 54%|█████▍    | 27/50 [03:28<02:57,  7.72s/it]

https://fbref.com/en/players/8397a50c/Tyrone-Mings 'NoneType' object has no attribute 'find'


 58%|█████▊    | 29/50 [03:46<02:55,  8.34s/it]

https://fbref.com/en/players/08511d65/Sergio-Ramos 'NoneType' object has no attribute 'find_next_sibling'


 76%|███████▌  | 38/50 [04:58<01:35,  7.94s/it]

https://fbref.com/en/players/b6964eb6/Chris-Smalling 'NoneType' object has no attribute 'find'


 92%|█████████▏| 46/50 [06:02<00:32,  8.09s/it]

https://fbref.com/en/players/6763f716/Leny-Yoro 'NoneType' object has no attribute 'find_next_sibling'


100%|██████████| 50/50 [06:34<00:00,  7.90s/it]


In [58]:
for name in tqdm(names[50:100]):
    getPlayerData(linkGen(name))
    time.sleep(6)

  2%|▏         | 1/50 [00:07<06:02,  7.40s/it]

https://fbref.com/en/players/b0c71810/Sofiane-Boufal 'NoneType' object has no attribute 'find'


 10%|█         | 5/50 [00:38<05:49,  7.76s/it]

https://fbref.com/en/players/b34c63a5/Rayan-Cherki 'NoneType' object has no attribute 'find_next_sibling'


 52%|█████▏    | 26/50 [03:34<03:34,  8.93s/it]

https://fbref.com/en/players/1fb1c435/Rodrigo 'NoneType' object has no attribute 'find'


 72%|███████▏  | 36/50 [04:57<01:51,  7.99s/it]

https://fbref.com/en/players/6622454d/Hakim-Ziyech 'NoneType' object has no attribute 'find'


 84%|████████▍ | 42/50 [05:44<01:01,  7.73s/it]

https://fbref.com/en/players/70d74ece/Karim-Benzema 'NoneType' object has no attribute 'find'


 96%|█████████▌| 48/50 [06:30<00:15,  7.74s/it]

https://fbref.com/en/players/39583cfd/Gerard-Deulofeu 'NoneType' object has no attribute 'find'


100%|██████████| 50/50 [06:46<00:00,  8.13s/it]


In [64]:
for name in tqdm(names[136:150]):
    getPlayerData(linkGen(name))
    time.sleep(6)

 21%|██▏       | 3/14 [00:28<01:39,  9.02s/it]

https://fbref.com/en/players/5c2b4f07/Serge-Aurier 'NoneType' object has no attribute 'find_next_sibling'


 43%|████▎     | 6/14 [00:54<01:10,  8.80s/it]

https://fbref.com/en/players/de39485a/Yannick-Carrasco 'NoneType' object has no attribute 'find'


 86%|████████▌ | 12/14 [01:49<00:18,  9.18s/it]

https://fbref.com/en/players/0ab1f153/Juan-Cuadrado 'NoneType' object has no attribute 'find'


100%|██████████| 14/14 [02:08<00:00,  9.18s/it]


In [65]:
for name in tqdm(names[150:200]):
    getPlayerData(linkGen(name))
    time.sleep(6)

  4%|▍         | 2/50 [00:18<07:14,  9.06s/it]

https://fbref.com/en/players/ae4e2852/Mattia-De-Sciglio 'NoneType' object has no attribute 'find'


 20%|██        | 10/50 [01:36<06:21,  9.55s/it]

https://fbref.com/en/players/1265a93a/Reece-James 'NoneType' object has no attribute 'find'


 22%|██▏       | 11/50 [01:46<06:15,  9.62s/it]

https://fbref.com/en/players/c3e66ffe/Arnau-Martinez 'NoneType' object has no attribute 'find_next_sibling'


 24%|██▍       | 12/50 [01:56<06:05,  9.61s/it]

https://fbref.com/tiny/gK0XP 'NoneType' object has no attribute 'find'


 42%|████▏     | 21/50 [03:25<04:49,  9.97s/it]

https://fbref.com/en/players/6aa3e78b/Ryan-Sessegnon 'NoneType' object has no attribute 'find'


 46%|████▌     | 23/50 [03:45<04:30, 10.02s/it]

https://fbref.com/en/players/fcb27134/Mohamed-Simakan 'NoneType' object has no attribute 'find_next_sibling'


 50%|█████     | 25/50 [04:04<04:00,  9.63s/it]

https://fbref.com/en/players/a65c844b/Nuno-Tavares 'NoneType' object has no attribute 'find_next_sibling'


 52%|█████▏    | 26/50 [04:13<03:51,  9.66s/it]

https://fbref.com/en/players/e73c9bb2/Alex-Telles 'NoneType' object has no attribute 'find'


 60%|██████    | 30/50 [04:54<03:19, 10.00s/it]

https://fbref.com/en/players/2b09d998/Tyler-Adams 'NoneType' object has no attribute 'find'


 68%|██████▊   | 34/50 [05:33<02:39,  9.95s/it]

https://fbref.com/en/players/f81ef10d/Maxence-Caqueret 'NoneType' object has no attribute 'find_next_sibling'


 80%|████████  | 40/50 [06:25<01:28,  8.87s/it]

https://fbref.com/en/players/5c74c0f5/Kiernan-Dewsbury-Hall 'NoneType' object has no attribute 'find_next_sibling'


 96%|█████████▌| 48/50 [07:34<00:17,  8.66s/it]

https://fbref.com/en/players/b9fbae28/NGolo-Kante 'NoneType' object has no attribute 'find'


100%|██████████| 50/50 [07:51<00:00,  9.43s/it]


In [67]:
for name in tqdm(names[200:]):
    getPlayerData(linkGen(name))
    time.sleep(6)

  0%|          | 0/15 [00:00<?, ?it/s]

https://fbref.com/en/players/6ce1f46f/Toni-Kroos 'NoneType' object has no attribute 'find_next_sibling'


 13%|█▎        | 2/15 [00:17<01:54,  8.78s/it]

https://fbref.com/en/players/4e6dbac4/Sergej-Milinkovic-Savic 'NoneType' object has no attribute 'find'


 73%|███████▎  | 11/15 [01:48<00:43, 10.81s/it]

https://fbref.com/en/players/0db169ae/Sandro-Tonali 'NoneType' object has no attribute 'find'


100%|██████████| 15/15 [02:25<00:00,  9.67s/it]


In [68]:
len(data_dict['Name'])

171

In [69]:
linkGen('Manuel Akanji')

'https://fbref.com/en/players/89ac64a6/Manuel-Akanji'

In [33]:
getPlayerData(linkGen('Harry Kane'))

[]

In [70]:
NewName=[
    'Marc Guehi',
    'Pau Cubarsi',
    'Radu Dragusin',
    'Riccardo Calafiori',
    'Jean-Clair Todibo',
    'Mario Hermoso',
    'Niklas Sule',
    'Pau Torres',
    'Robin Le Normand',
    'Eric Dier',
    'Arda Guler',
    'Jeremy Doku',
    'Jamal Musiala',
    'Michael Olise',
    'Miguel Gutierrez',
    'Jack Clarke',
    'Dominic Solanke',
    'Archie Gray',
    'Kai Havertz',
    'Cole Palmer',
    'Rico Lwewis'
]

In [71]:
for name in tqdm(NewName):
    getPlayerData(linkGen(name))
    time.sleep(6)

 24%|██▍       | 5/21 [00:44<02:20,  8.78s/it]

https://fbref.com/en/players/555f3a0b/Mario-Hermoso 'NoneType' object has no attribute 'find_next_sibling'


 67%|██████▋   | 14/21 [01:59<00:59,  8.46s/it]

https://fbref.com/en/players/7e98cff1/Miguel-Gutierrez 'NoneType' object has no attribute 'find_next_sibling'


 71%|███████▏  | 15/21 [02:07<00:49,  8.28s/it]

https://fbref.com/en/players/e16932d8/Jack-Clarke 'NoneType' object has no attribute 'find_next_sibling'


 81%|████████  | 17/21 [02:24<00:34,  8.55s/it]

https://fbref.com/en/players/f58515f5/Archie-Gray 'NoneType' object has no attribute 'find_next_sibling'


100%|██████████| 21/21 [03:00<00:00,  8.59s/it]


In [None]:
Missed =[
    'Sven Botman',
    'Rayan Cherki',
    'Leny Yoro',
    'Jean-Philippe Mateta',
    'Kiernan Dewsbury-Hall',
    'Miguel Gutierrez',
    'Jack Clarke',
    'Archie Gray',
    
]

In [None]:
def get_current_value(url):
    # Send a request to the provided URL
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    
    # Send a request to the provided URL with headers
    response = requests.get(url, headers=headers)
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find the meta tag with the name 'description'
        meta_tag = soup.find('meta', attrs={'name': 'description'})
        
        if meta_tag:
            # Extract the content attribute
            content = meta_tag.get('content', '')
            
            # Use regex to find the value in the format ₹ followed by a number and 'Cr'
            # market_value = re.search(r'₹[0-9]+(?:\.[0-9]+)?\s*Cr', content)
            market_value = re.search(r'₹([0-9]+(?:\.[0-9]+)?)\s*Cr', content)
            if market_value:
                # return market_value.group()
                # Extract the numeric part using group(1)
                numeric_value = market_value.group(1)
                
                try:
                    return int(numeric_value)*108041.60/1000000
                    #Value in M Euros
                except:
                    return "Not Int"
            else:
                return "Market value not found in meta tag"
        else:
            return "Meta tag not found"
    else:
        return f"Failed to retrieve page, status code: {response.status_code}"

In [77]:
market_values=[]

In [80]:
for name in tqdm(data_dict['Name'][100:]):
    market_values.append(get_current_value(linkGenTf(name)))
    time.sleep(5)

100%|██████████| 88/88 [10:24<00:00,  7.10s/it]


In [81]:
len(market_values)

188

In [82]:
len(data_dict['Name'])

188

In [83]:
data_dict['Market Value in M Euros'] = market_values

In [85]:
df=pd.DataFrame.from_dict(data_dict)

In [86]:
df

Unnamed: 0,Name,Height,Weight,Club,Nation,Non-Penalty Goals,npxG: Non-Penalty xG,Shots Total,Assists,xAG: Exp. Assisted Goals,...,Progressive Carries,Successful Take-Ons,Touches (Att Pen),Progressive Passes Rec,Tackles,Interceptions,Blocks,Clearances,Aerials Won,Market Value in M Euros
0,Manuel Akanji,187cm,84kg,Manchester City,Switzerland,90,87,58,28,61,...,98,27,70,95,48,8,9,3,3,38.894976
1,David Alaba,180cm,72kg,Real Madrid,Austria,19,94,99,94,93,...,48,27,86,93,10,54,1,7,4,17.286656
2,Joachim Andersen,190cm,74kg,Fulham,Denmark,40,51,59,94,98,...,31,72,71,54,47,47,5,92,60,30.251648
3,Ronald Araújo,188cm,78kg,Barcelona,Uruguay,50,95,95,90,86,...,68,82,93,49,38,9,39,14,72,Market value not found in meta tag
4,Alessandro Bastoni,190cm,83kg,Internazionale,Italy,46,61,56,96,99,...,99,87,85,99,48,30,15,1,21,Market value not found in meta tag
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,Michael Olise,178cm,68kg,Bayern Munich,France,99,84,97,91,99,...,77,84,74,73,64,85,92,66,54,47.538304
184,Dominic Solanke,187cm,74kg,Tottenham Hotspur,England,65,73,67,25,25,...,57,78,79,29,55,46,60,80,73,34.573312
185,Kai Havertz,186cm,77kg,Arsenal,Germany,99,99,94,89,79,...,71,43,99,97,7,17,25,19,99,60.503296
186,Cole Palmer,189cm,73kg,Chelsea,England,92,93,94,99,99,...,63,55,54,30,9,85,19,41,1,69.146624


In [87]:
df.to_csv('Data.csv')

In [5]:
import difflib

def find_most_similar_key(name, data):
    keys = list(data.keys())
    closest_match = difflib.get_close_matches(name, keys, n=1, cutoff=0.6)
    if closest_match:
        return data[closest_match[0]]
    return None

# Example dictionary
teams_leagues = {
    "Manchester City": "Premier League",
    "Real Madrid": "La Liga",
    "Fulham": "Premier League",
    "Barcelona": "La Liga",
    "Internazionale": "Serie A",
    "Juventus": "Serie A",
    "Bayern Munich": "Bundesliga",
    "Liverpool": "Premier League",
    "Atlético Madrid": "La Liga",
    "Manchester United": "Premier League",
    "Paris Saint-Germain": "Ligue 1",
    "West Ham United": "Premier League",
    "Tottenham Hotspur": "Premier League",
    "Arsenal": "Premier League",
    "Flamengo": "Série A",
    "Atalanta": "Serie A",
    "Newcastle United": "Premier League",
    "Dortmund": "Bundesliga",
    "Fluminense": "Série A",
    "Leverkusen": "Bundesliga",
    "Como": "Serie B",
    "Al Oruba": "Omani League",
    "Real Betis": "La Liga",
    "Napoli": "Serie A",
    "Milan": "Serie A",
    "Genoa": "Serie B",
    "Brighton & Hove Albion": "Premier League",
    "Chelsea": "Premier League",
    "Athletic Club": "La Liga",
    "Monaco": "Ligue 1",
    "Leeds United": "Championship",
    "RB Leipzig": "Bundesliga",
    "Osasuna": "La Liga",
    "Everton": "Premier League",
    "Boca Juniors": "Primera División",
    "Wolverhampton Wanderers": "Premier League",
    "Girona": "La Liga",
    "Benfica": "Primeira Liga",
    "Anderlecht": "Pro League",
    "Fenerbahçe": "Süper Lig",
    "Al Gharrafa": "Qatari Stars League",
    "Inter Miami": "MLS",
    "Villarreal": "La Liga",
    "Galatasaray": "Süper Lig",
    "Real Sociedad": "La Liga",
    "Al-Hilal": "Saudi Pro League",
    "Aston Villa": "Premier League",
    "Lazio": "Serie A",
    "Al Qadisiyah": "Saudi Pro League",
    "Mönchengladbach": "Bundesliga",
    "Brentford": "Premier League",
    "Marseille": "Ligue 1",
    "Crystal Palace": "Premier League",
    "Nottingham Forest": "Premier League"
}

# Example usage
name = "tottenham"  # Example input
league = find_most_similar_key(name, teams_leagues)
print(league)  # This will print the league of the most similar team name


Premier League


In [7]:
import pandas as pd

In [8]:
df=pd.read_csv('Data.csv')

In [9]:
LeagueName=[]

In [None]:
for name in df['Club']:
    print(league)