In [1]:
# Imports
import os
import unicodedata

import pandas as pd
import requests

from typing import Tuple

import time

In [7]:
headers = {
    'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'
}

In [15]:
# Functions
def get_info(url: str, api_endpoints: list) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Function to extract marketvalue and transfer history using api endpoint list as input
    
    Parameters
    ----------
    url : str
        main URL
    api_endpoints : list
        URL endpoint
    
    Output
    ------
    transferdf : pd.DataFrame
        transfer history
    marketdf : pd.DataFrame
        marketvalue history
    """

    transferresponse = []
    marketresponse = []


    transferresponse = requests.get(
                url + api_endpoints[0],
                headers=headers
                ).json()

    marketresponse = requests.get(
                url + api_endpoints[1],
                headers=headers
                ).json()
        
    transferdf = pd.json_normalize(transferresponse['transfers'])
    transferdf = transferdf[['date', 'season', 'marketValue', 'fee','to.clubName']]

    marketdf = pd.json_normalize(marketresponse['list'])
    marketdf = marketdf[['y', 'mw',	'datum_mw',	'verein', 'age']]

    return(transferdf, marketdf)

In [9]:
def convert_special_to_regular(text):
    """
    Converts special characters (like accented letters) in a string
    to their regular letter equivalents.
    """
    # Normalize the string to NFKD form, which separates base characters
    # from diacritical marks (accents).
    normalized_text = unicodedata.normalize('NFKD', text)

    # Encode to ASCII and then decode back, ignoring characters that
    # cannot be represented in ASCII (which effectively removes the diacritics).
    regular_text = normalized_text.encode('ascii', 'ignore').decode('utf-8')

    return regular_text

In [3]:
# Read team table csv
teamdf = pd.read_csv(os.getcwd() + '/' + 'data/currentteaminfo.csv')
teamdf

Unnamed: 0,Player,Age,Position,Value (€ in M)
0,David Raya,29,Goalkeeper,40.0
1,Kepa Arrizabalaga,30,Goalkeeper,10.0
2,Karl Hein,23,Goalkeeper,3.0
3,William Saliba,24,Defender,80.0
4,Gabriel Magalhães,27,Defender,75.0
5,Cristhian Mosquera,21,Defender,30.0
6,Jakub Kiwior,25,Defender,28.0
7,Myles Lewis-Skelly,18,Defender,45.0
8,Riccardo Calafiori,23,Defender,35.0
9,Oleksandr Zinchenko,28,Defender,20.0


In [4]:
# Create name list
namelist = teamdf['Player'].tolist()
input_namelist = [] 

# Convert to all lowercase with '-' in-between first and last name
for name in namelist:
    s = name.lower().replace(" ", "-")
    # Replace special letters with regular
    input_namelist.append(convert_special_to_regular(s))

input_namelist

['david-raya',
 'kepa-arrizabalaga',
 'karl-hein',
 'william-saliba',
 'gabriel-magalhaes',
 'cristhian-mosquera',
 'jakub-kiwior',
 'myles-lewis-skelly',
 'riccardo-calafiori',
 'oleksandr-zinchenko',
 'jurrien-timber',
 'ben-white',
 'martin-zubimendi',
 'christian-nrgaard',
 'declan-rice',
 'mikel-merino',
 'albert-sambi-lokonga',
 'martin-degaard',
 'fabio-vieira',
 'gabriel-martinelli',
 'leandro-trossard',
 'bukayo-saka',
 'ethan-nwaneri',
 'noni-madueke',
 'reiss-nelson',
 'viktor-gyokeres',
 'kai-havertz',
 'gabriel-jesus']

In [5]:
# Dataframe of name to player id
playeridlist = ['262749', '192279', '493513', '495666', '435338', '646750', '425918', '890721', '502821', '203853', '420243', '335721',
                '423440', '148367', '357662', '338424', '381967', '316264', '537598',
                '655488', '144028', '433177', '890719', '503987', '340325', '325443', '309400', '363205']

webinput_df = pd.DataFrame()
webinput_df['InputName'] = input_namelist
webinput_df['player_id'] = playeridlist

# Fix names of CN and MO
webinput_df.loc[webinput_df['InputName'] == 'christian-nrgaard', 'InputName'] = 'christian-norgaard'
webinput_df.loc[webinput_df['InputName'] == 'martin-degaard', 'InputName'] = 'martin-odegaard'

webinput_df

Unnamed: 0,InputName,player_id
0,david-raya,262749
1,kepa-arrizabalaga,192279
2,karl-hein,493513
3,william-saliba,495666
4,gabriel-magalhaes,435338
5,cristhian-mosquera,646750
6,jakub-kiwior,425918
7,myles-lewis-skelly,890721
8,riccardo-calafiori,502821
9,oleksandr-zinchenko,203853


In [None]:
# Get Transfer and Market history
url = 'https://www.transfermarkt.us/ceapi/'


for p,n in zip(webinput_df['InputName'], webinput_df['player_id']):

    # Save dataframe in data folder
    savepath = os.getcwd() + '/' + 'data' + '/' + 'players' + '/' + f'{p}'
    if not os.path.exists(savepath):
        os.makedirs(savepath)

    api_endpoints = [
            f"transferHistory/list/{n}",
            f"marketValueDevelopment/graph/{n}"
            ]
    
    transferdf, marketdf = get_info(url=url, api_endpoints=api_endpoints)

    # Save to path
    transferdf.to_csv(savepath + '/' + 'transferinfo.csv', index=False)
    marketdf.to_csv(savepath + '/' + 'marketinfo.csv', index=False)

    print(f'{p} complete')
    
    time.sleep(5)