In [79]:
import pandas as pd
from tqdm import tqdm
import numpy as np

import requests
from bs4 import BeautifulSoup
import time
from io import StringIO

In [4]:
seasons = np.arange(2023, 2024).tolist()
headers = {"User-Agent": "Mozilla/5.0 (X11; CrOS x86_64 12871.102.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.141 Safari/537.36"}

In [218]:
def parse_transfer_table(team_in, left=False):
    trs = team_in.find('tbody').find_all('tr')

    infos = []
    for tr in trs:
        player_name = tr.find('a')['title']
        player_id = tr.find('a')['href'].split('/')[-1]
        age = tr.find('td', {'class': 'zentriert alter-transfer-cell'}).text
        nat = tr.find('td', {'class': 'zentriert nat-transfer-cell'}).find('img')['title']
        pos = tr.find('td', {'class': 'pos-transfer-cell'}).text
        market_value = tr.find('td', {'class': 'rechts mw-transfer-cell'}).text
        try:
            team_left = tr.find('td', {'class': 'no-border-links verein-flagge-transfer-cell'}).find('a')['title']
        except:
            team_left = tr.find('td', {'class': 'no-border-links verein-flagge-transfer-cell'}).text
        
        try:
            team_left_id = tr.find('td', {'class': 'no-border-links verein-flagge-transfer-cell'}).find('a')['href'].split('/')[-3]
        except:
            team_left_id = np.nan
            
        fee = tr.find_all('td', {'class': 'rechts'})[-1].text
        all_infos = (player_name, player_id, age, nat, pos, market_value, team_left, team_left_id, fee)
        infos.append(all_infos)
    
    if not left:
        infos_df = pd.DataFrame(infos, columns = ['PLAYER_NAME', 'PLAYER_ID', 'AGE', 'NATIONALITY', 'POSITION', 'MARKET_VALUE_ON_TRANSFER', 'TEAM_LEFT', 'TEAM_LEFT_ID', 'FEE'])
    else:
        infos_df = pd.DataFrame(infos, columns = ['PLAYER_NAME', 'PLAYER_ID', 'AGE', 'NATIONALITY', 'POSITION', 'MARKET_VALUE_ON_TRANSFER', 'TEAM_JOINED', 'TEAM_JOINED_ID', 'FEE'])
    return infos_df 

In [224]:
def get_transfer(league_code, seasons):
    base_link = f'https://www.transfermarkt.co.uk/premier-league/transfers/wettbewerb/{league_code}/plus/?saison_id='

    for s in seasons:
        full_link = base_link+str(s)+'&s_w=s&leihe=3&intern=0&intern=1'
        
        response = requests.get(full_link, headers=headers)
        soup = BeautifulSoup(response.content, "html.parser")

        all_transfers = soup.find('div', {'class': 'large-8 columns'}).find_all("div", {"class": "box"})[3:]
        all_teams_df = pd.DataFrame()
        
        for transf in all_transfers:
            
            team_id = transf.find('a')['href'].split('/')[-3]
            team_name = transf.find('a')['title']
            
            team_in, team_out = transf.find_all('table')[0], transf.find_all('table')[1]#pd.read_html(StringIO(str(transf.find_all('table')[0])))[0], pd.read_html(StringIO(str(transf.find_all('table')[0])))[0]
            team_in_df = parse_transfer_table(team_in)
            team_out_df = parse_transfer_table(team_out, left=True)

            team_out_df['TEAM_LEFT_ID'] = [team_id]*len(team_out_df)
            team_out_df['TEAM_LEFT'] = [team_name]*len(team_out_df)
            
            team_in_df['TEAM_JOINED_ID'] = [team_id]*len(team_in_df)
            team_in_df['TEAM_JOINED'] = [team_name]*len(team_in_df)
            team_df = pd.concat([team_in_df, team_out_df]).reset_index(drop=True)
            all_teams_df = pd.concat([all_teams_df, team_df])
            
    
    return all_teams_df
            

In [226]:
leagues = ['GB1', 'L1', 'FR1', 'ES1', 'PO1', 'TR1', 'TS1', 'NL1', 'BE1', 'IT1']

In [228]:
leagues_df = pd.DataFrame()
for l in tqdm(leagues):
    a = get_transfer(l, [2024])
    leagues_df = pd.concat([leagues_df, a])

Unnamed: 0,PLAYER_NAME,PLAYER_ID,AGE,NATIONALITY,POSITION,MARKET_VALUE_ON_TRANSFER,TEAM_LEFT,TEAM_LEFT_ID,FEE,TEAM_JOINED_ID,TEAM_JOINED
0,Riccardo Calafiori,502821,22,Italy,Centre-Back,€45.00m,Bologna FC 1909,1025,€45.00m,11,Arsenal FC
1,Mikel Merino,338424,28,Spain,Central Midfield,€50.00m,Real Sociedad,681,€32.00m,11,Arsenal FC
2,David Raya,262749,28,Spain,Goalkeeper,€35.00m,Brentford FC,1148,€31.90m,11,Arsenal FC
3,Raheem Sterling,134425,29,England,Left Winger,€35.00m,Chelsea FC,631,loan transfer,11,Arsenal FC
4,Neto,111819,35,Brazil,Goalkeeper,€2.50m,AFC Bournemouth,989,loan transfer,11,Arsenal FC
...,...,...,...,...,...,...,...,...,...,...,...
18,Chem Campbell,614603,21,Wales,Attacking Midfield,€900k,Wolverhampton Wanderers,543,loan transfer,1032,Reading FC
19,Tawanda Chirewa,724783,20,Zimbabwe,Attacking Midfield,€500k,Wolverhampton Wanderers,543,loan transfer,22,Derby County
20,Ki-Jana Hoever,485583,22,Netherlands,Right-Back,€4.50m,Wolverhampton Wanderers,543,loan transfer,290,AJ Auxerre
21,Hugo Bueno,698678,21,Spain,Left-Back,€6.00m,Wolverhampton Wanderers,543,loan transfer,234,Feyenoord Rotterdam


In [None]:
leagues_df.to_csv('full_transfers.csv', index=False)