In [12]:
import requests
from bs4 import BeautifulSoup
import re
import time
import pandas as pd
import numpy as np

In [None]:
BASE_URL = "https://unityleague.gg"
LEADERBOARD_URL = f"{BASE_URL}/ranking/eu2025/BE/?showAll=true"

def get_soup_from_url(url):
    response = requests.get(url)
    response.raise_for_status()  # Raise exception for bad status codes
    return BeautifulSoup(response.text, 'html.parser')

def extract_players(soup):
    players = []
    for row in soup.select("table#rankingTable tbody tr"):
        cols = row.find_all("td")
        if len(cols) < 3:
            continue
            
        player_tag = cols[2].find("a")
        if player_tag:
            players.append({
                "rank": cols[0].get_text(strip=True),
                "points": int(cols[1].get_text(strip=True)),
                "name": player_tag.get_text(strip=True),
                "profile_url": BASE_URL + player_tag['href'],
                "player_id": player_tag['href'].split('/')[2]
            })
    return players

def parse_player_profile(url):
    try:
        soup = get_soup_from_url(url)
        events_section = soup.find("h2", string=re.compile("Event history", re.I))
        if not events_section:
            return None

        total_wins, total_losses, total_draws = 0, 0, 0
        draft_weekly_count = 0
        
        for row in events_section.find_next("table").select("tbody tr"):
            cols = row.find_all('td')
            if len(cols) < 7:
                continue

            # Check format and event type
            if cols[4].text.strip() == "Limited" and "draft" in cols[1].text.lower() and "weekly" in cols[1].text.lower():
                match = re.match(r"(\d+)\s*-\s*(\d+)\s*-\s*(\d+)", cols[6].text.strip())
                if match:
                    total_wins += int(match.group(1))
                    total_losses += int(match.group(2))
                    total_draws += int(match.group(3))
                    draft_weekly_count += 1

        if draft_weekly_count == 0:
            return None

        total_matches = total_wins + total_losses + total_draws
        return {
            "wins": total_wins,
            "losses": total_losses,
            "draws": total_draws,
            "winrate": round((total_wins + 0.5 * total_draws) / total_matches * 100, 1),
            "limited_events": draft_weekly_count,
            "matches": total_matches
        }
        
    except Exception as e:
        print(f"Error processing profile: {e}")
        return None

def main():
    players = extract_players(get_soup_from_url(LEADERBOARD_URL))
    results = []
    
    for player in players:
        try:
            if stats := parse_player_profile(player['profile_url']):
                player.update(stats)
                results.append(player)
                print(f"Processed {player['name']}")
            time.sleep(1)
        except Exception as e:
            print(f"Error with {player['name']}: {e}")
    
    # Create DataFrame and save results
    df = pd.DataFrame(results)
    
    # Timestamp for update
    df['update_time'] = pd.Timestamp.now()
    df['update_time'] = df['update_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
    
    df["matches"] = df["matches"].astype(float)
    df["winrate"] = df["winrate"].astype(float)
    df["perf_score"] = (df["winrate"] * np.log(df["matches"] + 1)).round(2)
    # Réinitialisation du classement
    df.sort_values('perf_score', ascending=False, inplace=True)

    df.reset_index(drop=True, inplace=True)
    df['rank'] = df.index + 1
    df['rank'] = df['rank'].astype(int)
    
    df.to_csv('draft_weekly_leaderboard.csv', index=False)
    df.to_json("leaderboard.json", orient="records", indent=2)
    return df

if __name__ == "__main__":
    df = main()


Processed Aymeric Steinbach
Processed Thomas Bijl
Processed Thomas Decamp
Processed Niels Lucking
Processed Emmanuel Fleau De Frôler
Processed Ricardo R
Processed Nicolas Sargos
Processed Maciej Mrozowski
Processed Martin Lepeintre
Processed Pierrick Decamp
Processed Pierre Delandsheere
Processed Vincent Bichelberger
Processed Ozan Fincan


KeyboardInterrupt: 

In [None]:
import numpy as np

df = pd.read_csv('draft_weekly_leaderboard.csv')

df["matches"] = df["matches"].astype(float)
df["winrate"] = df["winrate"].astype(float)
df["perf_score"] = (df["winrate"] * np.log(df["matches"] + 1)).round(2)
df 

Unnamed: 0,rank,points,name,profile_url,player_id,wins,losses,draws,winrate,limited_events,matches,perf_score
0,148,12,Diane Kaufman,https://unityleague.gg/player/17726/,17726,3,0,0,100.0,1,3.0,138.63
1,146,12,Nicolas De Herdt,https://unityleague.gg/player/16011/,16011,3,0,0,100.0,1,3.0,138.63
2,73,39,César Castello,https://unityleague.gg/player/15063/,15063,3,0,0,100.0,1,3.0,138.63
3,131,12,Minh Lê Thanh,https://unityleague.gg/player/11241/,11241,3,0,0,100.0,1,3.0,138.63
4,62,43,Arthur Moulin,https://unityleague.gg/player/12264/,12264,10,1,1,87.5,4,12.0,224.43
...,...,...,...,...,...,...,...,...,...,...,...,...
99,297,3,Kodi Craft,https://unityleague.gg/player/14999/,14999,0,3,0,0.0,1,3.0,0.00
100,300,3,Alexandre Vieren,https://unityleague.gg/player/16628/,16628,0,3,0,0.0,1,3.0,0.00
101,301,3,Marius Escande,https://unityleague.gg/player/17209/,17209,0,3,0,0.0,1,3.0,0.00
102,302,3,Rafael Scudelari de Macedo,https://unityleague.gg/player/17728/,17728,0,3,0,0.0,1,3.0,0.00


In [None]:
df.sort_values(['perf_score', 'winrate'], ascending=False, inplace=True)

# Réinitialisation du classement
df.reset_index(drop=True, inplace=True)
df['rank'] = df.index + 1
df['rank'] = df['rank'].astype(int)

In [None]:
df 

Unnamed: 0,rank,points,name,profile_url,player_id,wins,losses,draws,winrate,limited_events,matches,perf_score,last_updated
0,148,12,Diane Kaufman,https://unityleague.gg/player/17726/,17726,3,0,0,100.0,1,3.0,138.63,2025-05-29 13:39:41
1,146,12,Nicolas De Herdt,https://unityleague.gg/player/16011/,16011,3,0,0,100.0,1,3.0,138.63,2025-05-29 13:39:41
2,73,39,César Castello,https://unityleague.gg/player/15063/,15063,3,0,0,100.0,1,3.0,138.63,2025-05-29 13:39:41
3,131,12,Minh Lê Thanh,https://unityleague.gg/player/11241/,11241,3,0,0,100.0,1,3.0,138.63,2025-05-29 13:39:41
4,62,43,Arthur Moulin,https://unityleague.gg/player/12264/,12264,10,1,1,87.5,4,12.0,224.43,2025-05-29 13:39:41
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,297,3,Kodi Craft,https://unityleague.gg/player/14999/,14999,0,3,0,0.0,1,3.0,0.00,2025-05-29 13:39:41
100,300,3,Alexandre Vieren,https://unityleague.gg/player/16628/,16628,0,3,0,0.0,1,3.0,0.00,2025-05-29 13:39:41
101,301,3,Marius Escande,https://unityleague.gg/player/17209/,17209,0,3,0,0.0,1,3.0,0.00,2025-05-29 13:39:41
102,302,3,Rafael Scudelari de Macedo,https://unityleague.gg/player/17728/,17728,0,3,0,0.0,1,3.0,0.00,2025-05-29 13:39:41


In [None]:
df.to_json("leaderboard.json", orient="records", indent=2)

In [15]:
df['update_time'] = pd.Timestamp.now()
df 

Unnamed: 0,rank,points,name,profile_url,player_id,wins,losses,draws,winrate,limited_events,matches,last_updated,perf_score,update_time
6,7,146,Nicolas Sargos,https://unityleague.gg/player/11223/,11223,24,7,2,75.8,11,33.0,2025-05-29 13:58:21.583219,267.30,2025-05-29 14:55:35.315008
7,8,122,Maciej Mrozowski,https://unityleague.gg/player/11224/,11224,22,10,1,68.2,11,33.0,2025-05-29 13:58:21.583219,240.50,2025-05-29 14:55:35.315008
29,30,43,Arthur Moulin,https://unityleague.gg/player/12264/,12264,10,1,1,87.5,4,12.0,2025-05-29 13:58:21.583219,224.43,2025-05-29 14:55:35.315008
11,12,108,Vincent Bichelberger,https://unityleague.gg/player/11227/,11227,15,9,0,62.5,8,24.0,2025-05-29 13:58:21.583219,201.18,2025-05-29 14:55:35.315008
1,2,246,Thomas Bijl,https://unityleague.gg/player/11219/,11219,16,11,0,59.3,9,27.0,2025-05-29 13:58:21.583219,197.60,2025-05-29 14:55:35.315008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,97,3,Félix Boisvert,https://unityleague.gg/player/11239/,11239,0,3,0,0.0,1,3.0,2025-05-29 13:58:21.583219,0.00,2025-05-29 14:55:35.315008
28,29,45,Michal Dlugosz,https://unityleague.gg/player/14027/,14027,0,3,0,0.0,1,3.0,2025-05-29 13:58:21.583219,0.00,2025-05-29 14:55:35.315008
68,69,12,Victor Lepeintre,https://unityleague.gg/player/13954/,13954,0,3,0,0.0,1,3.0,2025-05-29 13:58:21.583219,0.00,2025-05-29 14:55:35.315008
66,67,13,François Schellekens,https://unityleague.gg/player/12831/,12831,0,3,0,0.0,1,3.0,2025-05-29 13:58:21.583219,0.00,2025-05-29 14:55:35.315008


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 104 entries, 6 to 103
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   rank            104 non-null    int32         
 1   points          104 non-null    int64         
 2   name            104 non-null    object        
 3   profile_url     104 non-null    object        
 4   player_id       104 non-null    object        
 5   wins            104 non-null    int64         
 6   losses          104 non-null    int64         
 7   draws           104 non-null    int64         
 8   winrate         104 non-null    float64       
 9   limited_events  104 non-null    int64         
 10  matches         104 non-null    float64       
 11  last_updated    104 non-null    datetime64[ns]
 12  perf_score      104 non-null    float64       
 13  update_time     104 non-null    datetime64[ns]
dtypes: datetime64[ns](2), float64(3), int32(1), int64(5), object(3)

In [None]:
import json
import shutil

try:
    with open('leaderboard.json', 'r', encoding='utf-8') as f:
        prev_data = json.load(f)
    prev_df = pd.DataFrame(prev_data)
except FileNotFoundError:
    prev_df = pd.DataFrame([])
    
# On crée un mapping nom -> rang précédent
if not prev_df.empty:
    prev_rank_map = prev_df.set_index('name')['rank'].to_dict()
else:
    prev_rank_map = {}

# Ajoute une colonne last_rank au DataFrame actuel
df['last_rank'] = df['name'].map(prev_rank_map)


# Sauvegarde le classement précédent
try:
    shutil.copy('leaderboard.json', 'leaderboard_prev.json')
except FileNotFoundError:
    # Premier lancement, pas de fichier précédent
    pass

# Garde seulement les colonnes utiles
cols = [
    'rank', 'name', 'wins', 'losses', 'draws', 'matches',
    'winrate', 'limited_events', 'perf_score', 'points', 'profile_url',
    'update_time', 'last_rank'
]
# Si certaines colonnes n'existent pas, on les ignore
cols = [c for c in cols if c in df.columns]

# Sauvegarde le classement actuel
df[cols].to_json('leaderboard.json', orient='records', force_ascii=False, indent=2)