In [39]:
from library import table, append_table, historize, get_teams, get_competitions, load_keys, headers
from library import replace_table
import sqlite3
from datetime import datetime, timedelta
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Leagues

In [2]:
leagues_history = table("league")

league_data = get_competitions()
leagues =  pd.DataFrame(league_data, columns =  ['name', 'link'])
leagues = pd.merge(leagues.assign(joined =  1), leagues_history, on =  ['name', 'link'], how = 'outer')
leagues = leagues[leagues.joined != 1]

if len(leagues > 0):
    print(f"Committing {len(leagues)} records")
    leagues = leagues.drop("joined", axis = 1)
    append_table(leagues, league)
else:
    print("League table up-to-date")

No league updates today!


### Clubs

In [3]:
leagues = table("league")
league_urls = leagues.link.unique()

In [4]:
teams_df = pd.DataFrame()

#Scrape club data
for url in league_urls:
    teams_df = pd.concat([teams_df, get_teams(url)])

#Prepare df
teams_df = teams_df.dropna(subset = 'Club.1')
selection = ['Club.1', 'link']
teams_df = teams_df[selection].drop_duplicates().rename({"Club.1" : "club"}, axis = 1)

#Add hash keys and effective dates
teams_df = historize(teams_df)

#Filter only new data
current_clubs = table("club")
updates = teams_df[~teams_df.hash_key.isin(current_clubs.hash_key.unique())]

#Commit changes
if len(updates) > 0:
    print(f"Adding {len(updates)} new club records")
    append_table(updates ,'club')
else:
    print("Club table up-to-date")

https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1
https://www.transfermarkt.com/laliga/startseite/wettbewerb/ES1
https://www.transfermarkt.com/bundesliga/startseite/wettbewerb/L1
https://www.transfermarkt.com/serie-a/startseite/wettbewerb/IT1
https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1
https://www.transfermarkt.com/liga-portugal/startseite/wettbewerb/PO1
https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1
https://www.transfermarkt.com/jupiler-pro-league/startseite/wettbewerb/BE1
https://www.transfermarkt.com/super-liga-srbije/startseite/wettbewerb/SER1
https://www.transfermarkt.com/super-league-1/startseite/wettbewerb/GR1
https://www.transfermarkt.com/allsvenskan/startseite/wettbewerb/SE1
https://www.transfermarkt.com/supersport-hnl/startseite/wettbewerb/KR1
https://www.transfermarkt.com/major-league-soccer/startseite/wettbewerb/MLS1
https://www.transfermarkt.com/campeonato-brasileiro-serie-a/startseite/wettbewerb/BRA1
https://www

### Club Links

In [61]:
clubs = load_keys('club', ['club', 'link']) 

league_urls = clubs.link.unique()

team_links = []

for idx, league_url in enumerate(league_urls):
    if idx // 5 == idx / 5:
        print(f"{round(100*idx/len(league_urls))}% complete")
    print(league_url)
    response = requests.get(league_url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")

    # Find the table containing the teams
    temp = soup.select('table.items tbody tr')
    for row in soup.select('table.items tbody tr'):
        link_tag = row.select_one('td.hauptlink a')
        if link_tag:
            team_name = link_tag.text.strip()
            relative_link = link_tag['href']
            #filter out top scorer links
            if r"profil/spieler" not in relative_link:
                full_link = "https://www.transfermarkt.com" + relative_link
                team_links.append((league_url, team_name, full_link))

0% complete
https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1
https://www.transfermarkt.com/laliga/startseite/wettbewerb/ES1
https://www.transfermarkt.com/bundesliga/startseite/wettbewerb/L1
https://www.transfermarkt.com/serie-a/startseite/wettbewerb/IT1
https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1
21% complete
https://www.transfermarkt.com/liga-portugal/startseite/wettbewerb/PO1
https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1
https://www.transfermarkt.com/jupiler-pro-league/startseite/wettbewerb/BE1
https://www.transfermarkt.com/super-liga-srbije/startseite/wettbewerb/SER1
https://www.transfermarkt.com/super-league-1/startseite/wettbewerb/GR1
42% complete
https://www.transfermarkt.com/allsvenskan/startseite/wettbewerb/SE1
https://www.transfermarkt.com/supersport-hnl/startseite/wettbewerb/KR1
https://www.transfermarkt.com/major-league-soccer/startseite/wettbewerb/MLS1
https://www.transfermarkt.com/campeonato-brasileiro-serie-a/

In [62]:
teams_df = pd.DataFrame(team_links, columns = ['league_link', 'team', 'team_link']).drop_duplicates()
teams_df = teams_df[teams_df.team_link.str.contains("start")]

def createDetailedURL(url):
    return url.replace('startseite', 'kader') + r"/plus/1"

teams_df["team_link_detailed"] = teams_df['team_link'].apply(lambda x: createDetailedURL(x))
teams_df = historize(teams_df)

replace_table("club_links", teams_df)

Are you sure you want to overwrite this table? This will overwrite the existing data permanently (y/n)
y


In [63]:
temp = teams_df.copy()
temp['team'] = temp['team'].apply(lambda x: x.lower() if 'City' in x else x)
temp = historize(temp)
temp.head()

Unnamed: 0,league_link,team,team_link,team_link_detailed,hash_key,effective_start_date,effective_end_date
0,https://www.transfermarkt.com/premier-league/s...,manchester city,https://www.transfermarkt.com/manchester-city/...,https://www.transfermarkt.com/manchester-city/...,8c78d8fb7b4c61298e7afe0de658bd672cd98b576f815b...,2025-04-27,2099-12-31
1,https://www.transfermarkt.com/premier-league/s...,Arsenal FC,https://www.transfermarkt.com/fc-arsenal/start...,https://www.transfermarkt.com/fc-arsenal/kader...,c445a6b66aa3c248aad4aca423e9af30d8762075cc4079...,2025-04-27,2099-12-31
2,https://www.transfermarkt.com/premier-league/s...,Liverpool FC,https://www.transfermarkt.com/fc-liverpool/sta...,https://www.transfermarkt.com/fc-liverpool/kad...,3c9774d7213375a0d66659cbfd57dc0d5084064ef2378e...,2025-04-27,2099-12-31
3,https://www.transfermarkt.com/premier-league/s...,Chelsea FC,https://www.transfermarkt.com/fc-chelsea/start...,https://www.transfermarkt.com/fc-chelsea/kader...,b957b9b3e89dddaa7cdd75231b80f955f5da78d9cf0ae1...,2025-04-27,2099-12-31
4,https://www.transfermarkt.com/premier-league/s...,Tottenham Hotspur,https://www.transfermarkt.com/tottenham-hotspu...,https://www.transfermarkt.com/tottenham-hotspu...,facdb375bc6b6aa2585efb37d7a0fe4282279384847858...,2025-04-27,2099-12-31


In [64]:
primary_key = ['league_link', 'team']
history = load_keys('club_links', primary_key)
updates = temp[~temp.hash_key.isin(history.hash_key.unique())]
yesterday = str((datetime.today() - timedelta(days=1)).date())
updates

Unnamed: 0,league_link,team,team_link,team_link_detailed,hash_key,effective_start_date,effective_end_date
0,https://www.transfermarkt.com/premier-league/s...,manchester city,https://www.transfermarkt.com/manchester-city/...,https://www.transfermarkt.com/manchester-city/...,8c78d8fb7b4c61298e7afe0de658bd672cd98b576f815b...,2025-04-27,2099-12-31
19,https://www.transfermarkt.com/premier-league/s...,leicester city,https://www.transfermarkt.com/leicester-city/s...,https://www.transfermarkt.com/leicester-city/k...,daf7593c5ecb9979393a46a311ef9b464dddc6de2c181a...,2025-04-27,2099-12-31
130,https://www.transfermarkt.com/eredivisie/start...,almere city fc,https://www.transfermarkt.com/almere-city-fc/s...,https://www.transfermarkt.com/almere-city-fc/k...,13f7a488830dd21158c4abbbd9e4f524721f2a3e9f455d...,2025-04-27,2099-12-31
216,https://www.transfermarkt.com/major-league-soc...,orlando city sc,https://www.transfermarkt.com/orlando-city-sc/...,https://www.transfermarkt.com/orlando-city-sc/...,913172fa2a944b1675938595e03058c1af240f443b65a7...,2025-04-27,2099-12-31
220,https://www.transfermarkt.com/major-league-soc...,new york city fc,https://www.transfermarkt.com/new-york-city-fc...,https://www.transfermarkt.com/new-york-city-fc...,1729aa464a1ca1ff0e90a012c1920666cd4b4f9957ae0f...,2025-04-27,2099-12-31
226,https://www.transfermarkt.com/major-league-soc...,real salt lake city,https://www.transfermarkt.com/real-salt-lake-c...,https://www.transfermarkt.com/real-salt-lake-c...,1202a574167b3d8ddb207ab153cf8cd74e363cd6e63bde...,2025-04-27,2099-12-31
230,https://www.transfermarkt.com/major-league-soc...,sporting kansas city,https://www.transfermarkt.com/sporting-kansas-...,https://www.transfermarkt.com/sporting-kansas-...,140a1afe835a59b5ea49e79fac6db15809edb80c5ca17d...,2025-04-27,2099-12-31
393,https://www.transfermarkt.com/betway-premiersh...,cape town city fc,https://www.transfermarkt.com/cape-town-city-f...,https://www.transfermarkt.com/cape-town-city-f...,945c5a3fb2debbe5284186d517d816cb877382e75400c8...,2025-04-27,2099-12-31
398,https://www.transfermarkt.com/betway-premiersh...,polokwane city fc,https://www.transfermarkt.com/polokwane-city-f...,https://www.transfermarkt.com/polokwane-city-f...,602445891213d3cef59323aa3a8b26a0aaca5cf32721e3...,2025-04-27,2099-12-31


In [65]:


for idx, row in updates.iterrows():
    
    club = row['team'].title()
    link = row['league_link']
    
#     Query = f"""UPDATE club_links
#     SET effective_end_date = DATE('now')
#     WHERE team = '{club}'
#     AND league_link = '{link}'
#     AND effective_end_date = '2099-12-31';
#     """
    Query = f"""UPDATE club_links
    SET effective_end_date = DATE('now')
    WHERE team = '{club}'
    AND league_link = '{link}'
    """
    print(Query)
    print(Query)
    
    conn = sqlite3.connect('transfermarkt.db')
    cursor = conn.cursor()
    cursor.execute(Query)
    conn.commit()
    conn.close()

UPDATE club_links
    SET effective_end_date = DATE('now')
    WHERE team = 'Manchester City'
    AND league_link = 'https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1'
    
UPDATE club_links
    SET effective_end_date = DATE('now')
    WHERE team = 'Manchester City'
    AND league_link = 'https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1'
    
UPDATE club_links
    SET effective_end_date = DATE('now')
    WHERE team = 'Leicester City'
    AND league_link = 'https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1'
    
UPDATE club_links
    SET effective_end_date = DATE('now')
    WHERE team = 'Leicester City'
    AND league_link = 'https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1'
    
UPDATE club_links
    SET effective_end_date = DATE('now')
    WHERE team = 'Almere City Fc'
    AND league_link = 'https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1'
    
UPDATE club_links
    SET effective_e

In [66]:
temp = table("club_links")
temp[temp.team.str.contains("City")]

Unnamed: 0,league_link,team,team_link,team_link_detailed,hash_key,effective_start_date,effective_end_date
0,https://www.transfermarkt.com/premier-league/s...,Manchester City,https://www.transfermarkt.com/manchester-city/...,https://www.transfermarkt.com/manchester-city/...,44a3e87290d2bfc86a8807cfe97e68d1c851c192ace915...,2025-04-27,2025-04-27
19,https://www.transfermarkt.com/premier-league/s...,Leicester City,https://www.transfermarkt.com/leicester-city/s...,https://www.transfermarkt.com/leicester-city/k...,fd4e8afdb0b3c12896a02423c92791a6d597c0bf198c42...,2025-04-27,2025-04-27
130,https://www.transfermarkt.com/eredivisie/start...,Almere City FC,https://www.transfermarkt.com/almere-city-fc/s...,https://www.transfermarkt.com/almere-city-fc/k...,f350f6bb41bfa63869a3773b7ca78048be0a3eb7b820c0...,2025-04-27,2099-12-31
216,https://www.transfermarkt.com/major-league-soc...,Orlando City SC,https://www.transfermarkt.com/orlando-city-sc/...,https://www.transfermarkt.com/orlando-city-sc/...,eeb0eb93a4d9e47490a8b0e5edb49ebe728b6f42c32951...,2025-04-27,2099-12-31
220,https://www.transfermarkt.com/major-league-soc...,New York City FC,https://www.transfermarkt.com/new-york-city-fc...,https://www.transfermarkt.com/new-york-city-fc...,f4949fdb1ee9bcf9efbea742da867fe4d3cd190086b29c...,2025-04-27,2099-12-31
226,https://www.transfermarkt.com/major-league-soc...,Real Salt Lake City,https://www.transfermarkt.com/real-salt-lake-c...,https://www.transfermarkt.com/real-salt-lake-c...,d69b7c54b511a8e0fca3439c6ac13d4971e74b113f043b...,2025-04-27,2025-04-27
230,https://www.transfermarkt.com/major-league-soc...,Sporting Kansas City,https://www.transfermarkt.com/sporting-kansas-...,https://www.transfermarkt.com/sporting-kansas-...,41ae4b3264846ed60a2ae06dca632df7e64b2839f979b6...,2025-04-27,2025-04-27
393,https://www.transfermarkt.com/betway-premiersh...,Cape Town City FC,https://www.transfermarkt.com/cape-town-city-f...,https://www.transfermarkt.com/cape-town-city-f...,aff1832d9953af4d5918953f18556f8b66aa292f83e1fd...,2025-04-27,2099-12-31
398,https://www.transfermarkt.com/betway-premiersh...,Polokwane City FC,https://www.transfermarkt.com/polokwane-city-f...,https://www.transfermarkt.com/polokwane-city-f...,8ac2d998818c8e255c26a08e0ce8ea26aea6f95dbe257f...,2025-04-27,2099-12-31


In [None]:
ze te duur zijn