In [1]:
# Imports
import os
import sys
import unicodedata

import pandas as pd
import re
import requests

from bs4 import BeautifulSoup

In [10]:
# Functions
def convert_special_to_regular(text):
    """
    Converts special characters (like accented letters) in a string
    to their regular letter equivalents.
    """
    # Normalize the string to NFKD form, which separates base characters
    # from diacritical marks (accents).
    normalized_text = unicodedata.normalize('NFKD', text)

    # Encode to ASCII and then decode back, ignoring characters that
    # cannot be represented in ASCII (which effectively removes the diacritics).
    regular_text = normalized_text.encode('ascii', 'ignore').decode('utf-8')

    return regular_text

In [3]:
# Read team table csv
teamdf = pd.read_csv(os.getcwd() + '/' + 'data/currentteaminfo.csv')
teamdf

Unnamed: 0,Player,Age,Position,Value (€ in M)
0,David Raya,29,Goalkeeper,40.0
1,Kepa Arrizabalaga,30,Goalkeeper,10.0
2,Karl Hein,23,Goalkeeper,3.0
3,William Saliba,24,Defender,80.0
4,Gabriel Magalhães,27,Defender,75.0
5,Cristhian Mosquera,21,Defender,30.0
6,Jakub Kiwior,25,Defender,28.0
7,Myles Lewis-Skelly,18,Defender,45.0
8,Riccardo Calafiori,23,Defender,35.0
9,Oleksandr Zinchenko,28,Defender,20.0


In [13]:
# Create name list
namelist = teamdf['Player'].tolist()
input_namelist = [] 

# Convert to all lowercase with '-' in-between first and last name
for name in namelist:
    s = name.lower().replace(" ", "-")
    # Replace special letters with regular
    input_namelist.append(convert_special_to_regular(s))

input_namelist

['david-raya',
 'kepa-arrizabalaga',
 'karl-hein',
 'william-saliba',
 'gabriel-magalhaes',
 'cristhian-mosquera',
 'jakub-kiwior',
 'myles-lewis-skelly',
 'riccardo-calafiori',
 'oleksandr-zinchenko',
 'jurrien-timber',
 'ben-white',
 'martin-zubimendi',
 'christian-nrgaard',
 'declan-rice',
 'mikel-merino',
 'albert-sambi-lokonga',
 'martin-degaard',
 'fabio-vieira',
 'gabriel-martinelli',
 'leandro-trossard',
 'bukayo-saka',
 'ethan-nwaneri',
 'noni-madueke',
 'reiss-nelson',
 'viktor-gyokeres',
 'kai-havertz',
 'gabriel-jesus']

In [None]:
# Dataframe of name to player id
playeridlist = ['262749', '192279', '493513', '495666', '435338', '646750', '425918', '890721', '502821', '203853', '420243', '335721',
                '423440', '148367', '357662', '338424', '381967', '316264', '537598',
                '655488', '144028', '433177', '890719', '503987', '340325', '325443', '309400', '363205']

webinput_df = pd.DataFrame()
webinput_df['InputName'] = input_namelist
webinput_df['player_id'] = playeridlist

# Fix names of CN and MO
webinput_df.loc[webinput_df['InputName'] == 'christian-nrgaard', 'InputName'] = 'christian-norgaard'
webinput_df.loc[webinput_df['InputName'] == 'martin-degaard', 'InputName'] = 'martin-odegaard'

webinput_df

Unnamed: 0,InputName,player_id
0,david-raya,262749
1,kepa-arrizabalaga,192279
2,karl-hein,493513
3,william-saliba,495666
4,gabriel-magalhaes,435338
5,cristhian-mosquera,646750
6,jakub-kiwior,425918
7,myles-lewis-skelly,890721
8,riccardo-calafiori,502821
9,oleksandr-zinchenko,203853


In [None]:
headers = {
    'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'
}

retry_names = [] # If unable to connect to page

# Retrieve current Arsenal player info
for p,n in zip(webinput_df['InputName'], webinput_df['player_id']):
    page = f"https://www.transfermarkt.us/{p}/profil/spieler/{n}"
    pagetree = requests.get(page, headers=headers)

    # Check if successful otherwise record player name
    if pagetree.status_code != 200:
        retry_names.append(p)
    
    pagesoup = BeautifulSoup(pagetree.content, 'html.parser')

200
200
200
200
200
200
200
200
503
200
200
503
200
503


KeyboardInterrupt: 

In [10]:
headers = {
    'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'
}

url = "https://www.transfermarkt.us/kai-havertz/profil/spieler/309400"
pagetree = requests.get(url, headers=headers)
pagesoup = BeautifulSoup(pagetree.content, 'html.parser')


In [13]:
api_endpoints = [
    f"transferHistory/list/309400",
    f"marketValueDevelopment/graph/309400"
]

transferresponse = []
marketresponse = []

c = 0
for endpoint in api_endpoints:
    if c== 0:

        transferresponse = requests.get(
        'https://www.transfermarkt.us/ceapi/' + endpoint,
        headers=headers
        ).json()
    else:
        marketresponse = requests.get(
        'https://www.transfermarkt.us/ceapi/' + endpoint,
        headers=headers
        ).json()

    c += 1

In [35]:
test = pd.json_normalize(transferresponse['transfers'])
test = test[['date', 'season', 'marketValue', 'fee','to.clubName']]
test

Unnamed: 0,date,season,marketValue,fee,to.clubName
0,"Jul 1, 2023",23/24,€55.00m,€75.00m,Arsenal
1,"Sep 4, 2020",20/21,€81.00m,€80.00m,Chelsea
2,"Jul 1, 2016",16/17,€100k,-,B. Leverkusen
3,"Jul 1, 2014",14/15,-,-,Leverkusen U17
4,"Jul 1, 2010",10/11,-,free transfer,Leverkusen Yth.
5,"Jul 1, 2009",09/10,-,free transfer,Aachen Yth.


In [37]:
pd.json_normalize(marketresponse['list'])

Unnamed: 0,x,y,mw,datum_mw,verein,age,wappen
0,1467324000000,100000,€100k,"Jul 1, 2016",Bayer 04 Leverkusen,17,https://tmssl.akamaized.net//images/wappen/pro...
1,1486422000000,1500000,€1.50m,"Feb 7, 2017",Bayer 04 Leverkusen,17,
2,1490137200000,5000000,€5.00m,"Mar 22, 2017",Bayer 04 Leverkusen,17,
3,1497823200000,8000000,€8.00m,"Jun 19, 2017",Bayer 04 Leverkusen,18,
4,1514415600000,18000000,€18.00m,"Dec 28, 2017",Bayer 04 Leverkusen,18,
5,1522792800000,25000000,€25.00m,"Apr 4, 2018",Bayer 04 Leverkusen,18,
6,1528149600000,28000000,€28.00m,"Jun 5, 2018",Bayer 04 Leverkusen,18,
7,1534888800000,35000000,€35.00m,"Aug 22, 2018",Bayer 04 Leverkusen,19,
8,1540159200000,55000000,€55.00m,"Oct 22, 2018",Bayer 04 Leverkusen,19,
9,1545087600000,65000000,€65.00m,"Dec 18, 2018",Bayer 04 Leverkusen,19,
