In [None]:
%pip install playwright beautifulsoup4 pandas
%playwright install chromium



In [None]:
import time
import random
import os
import pandas as pd
from bs4 import BeautifulSoup
from playwright.async_api import async_playwright


YEAR = 2026

OUTPUT_PATH = r"C:\Users\tahsi\Downloads\DenPicks\denpicks\app\datasets\lol_data_2026.csv"


os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)

tournament_urls = {
    "LCK": "https://gol.gg/tournament/tournament-matchlist/LCK%20Cup%202026/",
    "LEC": "https://gol.gg/tournament/tournament-matchlist/LEC%202026%20Versus%20Season/",
    "LEC_Playoffs": "https://gol.gg/tournament/tournament-matchlist/LEC%202026%20Versus%20Playoffs/",
    "LCS": "https://gol.gg/tournament/tournament-matchlist/LCS%202026%20Lock-In/",
    "LPL": "https://gol.gg/tournament/tournament-matchlist/LPL%202026%20Split%201/",
    "LPL_Playoffs": "https://gol.gg/tournament/tournament-matchlist/LPL%202026%20Split%201%20Playoffs/"
}

async def get_match_ids(page, url):
    print(f"Finding Match IDs from: {url}")
    try:
        await page.goto(url, wait_until="domcontentloaded")
        html = await page.content()
        soup = BeautifulSoup(html, 'html.parser')
        
        ids = []
        
        links = soup.find_all('a', href=lambda x: x and '/game/stats/' in x)
        for link in links:
            parts = link['href'].split('/')
            if 'stats' in parts:
                idx = parts.index('stats')
                ids.append(parts[idx+1])
        return list(set(ids))
    except Exception as e:
        print(f"Error fetching match list from {url}: {e}")
        return []

async def scrape_game_details(page, match_id, region):
    game_url = f"https://gol.gg/game/stats/{match_id}/page-game/"
    timeline_url = f"https://gol.gg/game/stats/{match_id}/page-timeline/"
    
   
    try:
        await page.goto(game_url, wait_until="domcontentloaded")
        soup_game = BeautifulSoup(await page.content(), 'html.parser')
        blue_header = soup_game.find('div', class_='blue-line-header')
        if not blue_header: return None 
        
        red_header = soup_game.find('div', class_='red-line-header')
        team_blue = blue_header.find('a').get_text(strip=True)
        team_red = red_header.find('a').get_text(strip=True)
        winner = team_blue if "WIN" in blue_header.get_text() else team_red
        
        player_to_info = {}
        champions_list = []
        player_links = soup_game.find_all('a', href=lambda x: x and '../players/player-stats/' in x)
        
        seen_players = []
        for link in player_links:
            name = link.get_text(strip=True)
            if name and name not in seen_players:
                parent_row = link.find_parent('tr')
                champ_img = parent_row.find('img', src=lambda x: x and 'champions_icon' in x)
                champ_name = champ_img.get('alt', 'Unknown') if champ_img else "Unknown"
                player_to_info[name] = {"team": team_blue if len(seen_players) < 5 else team_red}
                champions_list.append(champ_name)
                seen_players.append(name)
            if len(seen_players) == 10: break

    
        await page.goto(timeline_url, wait_until="domcontentloaded")
        soup_time = BeautifulSoup(await page.content(), 'html.parser')
        timeline_table = soup_time.find('table', class_='timeline')
        blue_kills, red_kills = 0, 0
        ft5_winner = "N/A"
        
        if timeline_table:
            for row in timeline_table.find_all('tr'):
                cols = row.find_all('td')
                if len(cols) < 7: continue
                action_img = cols[4].find('img')
                if action_img and 'kill-icon.png' in action_img.get('src', ''):
                    killer = cols[2].get_text(strip=True)
                    k_info = player_to_info.get(killer, {"team": "Unknown"})
                    if k_info['team'] == team_blue: blue_kills += 1
                    elif k_info['team'] == team_red: red_kills += 1
                    
                    if ft5_winner == "N/A":
                        if blue_kills == 5: ft5_winner = team_blue
                        elif red_kills == 5: ft5_winner = team_red
                if blue_kills >= 5 or red_kills >= 5: break

        entry = {
            "Game ID": match_id, "Year": YEAR, "Region": region,
            "Team Blue": team_blue, "Team Red": team_red,
            "Winner": winner, "FT5 Winner": ft5_winner
        }
        for i, champ in enumerate(champions_list):
            entry[f"Champ {i+1}"] = champ
        return entry
    except Exception as e:
        print(f"Error processing match {match_id}: {e}")
        return None

async def main():
    all_results = []
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        scraped_ids = set()

        for region_key, url in tournament_urls.items():
            region_name = region_key.split('_')[0] 
            base_ids = await get_match_ids(page, url)
            
            expanded_ids = []
            for bid in base_ids:
                for i in range(6): 
                    expanded_ids.append(str(int(bid) + i))
            
            unique_ids = sorted(list(set(expanded_ids)))
            
            for mid in unique_ids:
                if mid in scraped_ids: continue
                
                print(f"Scraping {region_name} | ID: {mid}...")
                data = await scrape_game_details(page, mid, region_name)
                
                if data:
                    all_results.append(data)
                    scraped_ids.add(mid)
                
                time.sleep(random.uniform(4, 7))
        
        await browser.close()
    
    df = pd.DataFrame(all_results)
    
    df.to_csv(OUTPUT_PATH, index=False)
    print(f"SUCCESS: Data saved to {OUTPUT_PATH}")
    return df


df_final = await main()
df_final.head()

Finding Match IDs from: https://gol.gg/tournament/tournament-matchlist/LCK%20Cup%202026/
Scraping LCK | ID: 73112...
Scraping LCK | ID: 73113...
Scraping LCK | ID: 73114...
Scraping LCK | ID: 73115...
Scraping LCK | ID: 73116...
Scraping LCK | ID: 73117...
Scraping LCK | ID: 73118...
Scraping LCK | ID: 73119...
Scraping LCK | ID: 73120...
Scraping LCK | ID: 73121...
Scraping LCK | ID: 73122...
Scraping LCK | ID: 73123...
Scraping LCK | ID: 73124...
Scraping LCK | ID: 73125...
Scraping LCK | ID: 73126...
Scraping LCK | ID: 73127...
Scraping LCK | ID: 73128...
Scraping LCK | ID: 73129...
Scraping LCK | ID: 73130...
Scraping LCK | ID: 73131...
Scraping LCK | ID: 73132...
Scraping LCK | ID: 73133...
Scraping LCK | ID: 73134...
Scraping LCK | ID: 73135...
Scraping LCK | ID: 73136...
Scraping LCK | ID: 73137...
Scraping LCK | ID: 73138...
Scraping LCK | ID: 73139...
Scraping LCK | ID: 73140...
Scraping LCK | ID: 73141...
Scraping LCK | ID: 73142...
Scraping LCK | ID: 73143...
Scraping LCK | 

Unnamed: 0,Game ID,Year,Region,Team Blue,Team Red,Winner,FT5 Winner,Champ 1,Champ 2,Champ 3,Champ 4,Champ 5,Champ 6,Champ 7,Champ 8,Champ 9,Champ 10
0,73112,2026,LCK,DN SOOPers,KT Rolster,DN SOOPers,DN SOOPers,Rumble,Wukong,Ahri,Corki,Pantheon,KSante,Malphite,Taliyah,Yunara,Alistar
1,73113,2026,LCK,DN SOOPers,KT Rolster,KT Rolster,KT Rolster,Sion,Dr. Mundo,Viktor,Ashe,Soraka,Renekton,Sylas,Ryze,Lucian,Braum
2,73114,2026,LCK,KT Rolster,DN SOOPers,KT Rolster,DN SOOPers,RekSai,Jarvan IV,Zoe,Aphelios,Thresh,Ambessa,Nocturne,Aurora,Ziggs,Rell
3,73115,2026,LCK,Dplus KIA,HANJIN BRION,Dplus KIA,Dplus KIA,Ambessa,Jarvan IV,Ryze,Ezreal,Karma,RekSai,Xin Zhao,Taliyah,Yunara,Neeko
4,73116,2026,LCK,Dplus KIA,HANJIN BRION,Dplus KIA,Dplus KIA,Rumble,Pantheon,LeBlanc,Sivir,Bard,KSante,Nocturne,Orianna,Jhin,Elise


In [2]:
import asyncio
import random
import os
import pandas as pd
from bs4 import BeautifulSoup
from playwright.async_api import async_playwright


YEAR = 2025
OUTPUT_PATH = r"C:\Users\tahsi\Downloads\DenPicks\denpicks\app\datasets\lol_data_2025.csv"

os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)


tournament_urls = {
    "LCS_Split1": "https://gol.gg/tournament/tournament-matchlist/LTA%20North%202025%20Split%201/",
    "LCS_Playoffs1": "https://gol.gg/tournament/tournament-matchlist/LTA%202025%20Split%201%20Playoffs/",
    "LCS_Split2": "https://gol.gg/tournament/tournament-matchlist/LTA%20North%202025%20Split%202/",
    "LCS_Playoffs2": "https://gol.gg/tournament/tournament-matchlist/LTA%20North%202025%20Split%202%20Playoffs/",
    "LCS_Split3": "https://gol.gg/tournament/tournament-matchlist/LTA%20North%202025%20Split%203/",
    "LEC_Winter": "https://gol.gg/tournament/tournament-matchlist/LEC%20Winter%202025/",
    "LEC_Winter_Playoffs": "https://gol.gg/tournament/tournament-matchlist/LEC%202025%20Winter%20Playoffs/",
    "LEC_Spring": "https://gol.gg/tournament/tournament-matchlist/LEC%202025%20Spring%20Season/",
    "LEC_Spring_Playoffs": "https://gol.gg/tournament/tournament-matchlist/LEC%202025%20Spring%20Playoffs/",
    "LEC_Summer": "https://gol.gg/tournament/tournament-matchlist/LEC%202025%20Summer%20Season/",
    "LEC_Summer_Playoffs": "https://gol.gg/tournament/tournament-matchlist/LEC%202025%20Summer%20Playoffs/",
    "LPL_Split1": "https://gol.gg/tournament/tournament-matchlist/LPL%202025%20Split%201/",
    "LPL_Split1_Playoffs": "https://gol.gg/tournament/tournament-matchlist/LPL%202025%20Split%201%20Playoffs/",
    "LPL_Placements": "https://gol.gg/tournament/tournament-matchlist/LPL%202025%20Split%202%20Placements/",
    "LPL_Split2": "https://gol.gg/tournament/tournament-matchlist/LPL%202025%20Split%202/",
    "LPL_Split2_Playoffs": "https://gol.gg/tournament/tournament-matchlist/LPL%202025%20Split%202%20Playoffs/",
    "LPL_Split3": "https://gol.gg/tournament/tournament-matchlist/LPL%202025%20Split%203/",
    "LPL_Finals": "https://gol.gg/tournament/tournament-matchlist/LPL%202025%20Grand%20Finals/",
    "LCK_Cup": "https://gol.gg/tournament/tournament-matchlist/LCK%20Cup%202025/",
    "LCK_R1_2": "https://gol.gg/tournament/tournament-matchlist/LCK%202025%20Rounds%201-2/",
    "LCK_MSI": "https://gol.gg/tournament/tournament-matchlist/LCK%202025%20Road%20to%20MSI/",
    "LCK_R3_5": "https://gol.gg/tournament/tournament-matchlist/LCK%202025%20Rounds%203-5/",
    "LCK_PlayIn": "https://gol.gg/tournament/tournament-matchlist/LCK%202025%20Season%20Play-In/",
    "LCK_Playoffs": "https://gol.gg/tournament/tournament-matchlist/LCK%202025%20Season%20Playoffs/"
}

async def get_match_ids(page, url):
    print(f"Finding Match IDs from: {url}")
    try:
        await page.goto(url, wait_until="domcontentloaded", timeout=60000)
        html = await page.content()
        soup = BeautifulSoup(html, 'html.parser')
        
        ids = []
        links = soup.find_all('a', href=lambda x: x and '/game/stats/' in x)
        for link in links:
            parts = link['href'].split('/')
            if 'stats' in parts:
                idx = parts.index('stats')
                match_id = parts[idx+1]
                if match_id.isdigit():
                    ids.append(match_id)
        return list(set(ids))
    except Exception as e:
        print(f"Error fetching match list from {url}: {e}")
        return []

async def scrape_game_details(page, match_id, region):
    game_url = f"https://gol.gg/game/stats/{match_id}/page-game/"
    timeline_url = f"https://gol.gg/game/stats/{match_id}/page-timeline/"
    
    try:
        
        await page.goto(game_url, wait_until="domcontentloaded", timeout=60000)
        soup_game = BeautifulSoup(await page.content(), 'html.parser')
        
        blue_header = soup_game.find('div', class_='blue-line-header')
        red_header = soup_game.find('div', class_='red-line-header')
        if not blue_header or not red_header: return None 
        
        team_blue = blue_header.find('a').get_text(strip=True)
        team_red = red_header.find('a').get_text(strip=True)
        winner = team_blue if "WIN" in blue_header.get_text() else team_red
        
        player_to_info = {}
        champions_list = []
        player_links = soup_game.find_all('a', href=lambda x: x and '../players/player-stats/' in x)
        
        seen_players = []
        for link in player_links:
            name = link.get_text(strip=True)
            if name and name not in seen_players:
                parent_row = link.find_parent('tr')
                champ_img = parent_row.find('img', src=lambda x: x and 'champions_icon' in x)
                champ_name = champ_img.get('alt', 'Unknown') if champ_img else "Unknown"
                player_to_info[name] = {"team": team_blue if len(seen_players) < 5 else team_red}
                champions_list.append(champ_name)
                seen_players.append(name)
            if len(seen_players) == 10: break

        
        await page.goto(timeline_url, wait_until="domcontentloaded", timeout=60000)
        soup_time = BeautifulSoup(await page.content(), 'html.parser')
        timeline_table = soup_time.find('table', class_='timeline')
        
        blue_kills, red_kills = 0, 0
        ft5_winner = "N/A"
        
        if timeline_table:
            for row in timeline_table.find_all('tr'):
                cols = row.find_all('td')
                if len(cols) < 7: continue
                action_img = cols[4].find('img')
                if action_img and 'kill-icon.png' in action_img.get('src', ''):
                    killer = cols[2].get_text(strip=True)
                    k_info = player_to_info.get(killer, {"team": "Unknown"})
                    if k_info['team'] == team_blue: blue_kills += 1
                    elif k_info['team'] == team_red: red_kills += 1
                    
                    if ft5_winner == "N/A":
                        if blue_kills == 5: ft5_winner = team_blue
                        elif red_kills == 5: ft5_winner = team_red
                if blue_kills >= 5 or red_kills >= 5: break

        entry = {
            "Game ID": match_id, "Year": YEAR, "Region": region,
            "Team Blue": team_blue, "Team Red": team_red,
            "Winner": winner, "FT5 Winner": ft5_winner
        }
        for i, champ in enumerate(champions_list):
            entry[f"Champ {i+1}"] = champ
        return entry
    except Exception as e:
        print(f"Error processing match {match_id}: {e}")
        return None

async def main():
    all_results = []
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
        page = await context.new_page()
        scraped_ids = set()

        for region_key, url in tournament_urls.items():
            
            region_name = region_key.split('_')[0] 
            
            base_ids = await get_match_ids(page, url)
            
            expanded_ids = []
            for bid in base_ids:
                for i in range(6): 
                    expanded_ids.append(str(int(bid) + i))
            
            unique_ids = sorted(list(set(expanded_ids)))
            
            for mid in unique_ids:
                if mid in scraped_ids: continue
                
                print(f"Scraping {region_name} | ID: {mid}...")
                data = await scrape_game_details(page, mid, region_name)
                
                if data:
                    all_results.append(data)
                    scraped_ids.add(mid)
                
                
                await asyncio.sleep(random.uniform(4, 7))
        
        await browser.close()
    
    df = pd.DataFrame(all_results)
    df.to_csv(OUTPUT_PATH, index=False)
    print(f"SUCCESS: 2025 Data saved to {OUTPUT_PATH}")
    return df
df_final = await main()
df_final.head()



Finding Match IDs from: https://gol.gg/tournament/tournament-matchlist/LTA%20North%202025%20Split%201/
Scraping LCS | ID: 63214...
Scraping LCS | ID: 63215...
Scraping LCS | ID: 63216...
Scraping LCS | ID: 63217...
Scraping LCS | ID: 63218...
Scraping LCS | ID: 63219...
Scraping LCS | ID: 63220...
Scraping LCS | ID: 63221...
Scraping LCS | ID: 63222...
Scraping LCS | ID: 63418...
Scraping LCS | ID: 63419...
Scraping LCS | ID: 63420...
Scraping LCS | ID: 63421...
Scraping LCS | ID: 63422...
Scraping LCS | ID: 63423...
Scraping LCS | ID: 63424...
Scraping LCS | ID: 63425...
Scraping LCS | ID: 63426...
Scraping LCS | ID: 63473...
Scraping LCS | ID: 63474...
Scraping LCS | ID: 63475...
Scraping LCS | ID: 63476...
Scraping LCS | ID: 63477...
Scraping LCS | ID: 63478...
Scraping LCS | ID: 63479...
Scraping LCS | ID: 63480...
Scraping LCS | ID: 63481...
Scraping LCS | ID: 63482...
Scraping LCS | ID: 63483...
Scraping LCS | ID: 63484...
Scraping LCS | ID: 63485...
Scraping LCS | ID: 63486...
S

Unnamed: 0,Game ID,Year,Region,Team Blue,Team Red,Winner,FT5 Winner,Champ 1,Champ 2,Champ 3,Champ 4,Champ 5,Champ 6,Champ 7,Champ 8,Champ 9,Champ 10
0,63214,2025,LCS,Shopify Rebellion,FlyQuest,FlyQuest,FlyQuest,Jax,Skarner,Hwei,Varus,Leona,Urgot,Ivern,Cassiopeia,Ashe,Rell
1,63215,2025,LCS,Shopify Rebellion,FlyQuest,FlyQuest,FlyQuest,KSante,Vi,Viktor,Caitlyn,Maokai,Ambessa,Sejuani,Azir,Jhin,Neeko
2,63217,2025,LCS,LYON,Cloud9,Cloud9,Cloud9,Ambessa,Nocturne,Orianna,Miss Fortune,Rell,Renekton,Wukong,Aurora,Ashe,Braum
3,63218,2025,LCS,LYON,Cloud9,Cloud9,LYON,Poppy,Vi,Ahri,Varus,Rakan,Gnar,Sejuani,Yone,Ziggs,Leona
4,63220,2025,LCS,Berlin International Gaming,Unicorns of Love Sexy Edition,Berlin International Gaming,Unicorns of Love Sexy Edition,KSante,Vi,Viktor,Xayah,Rakan,Aurora,Wukong,Galio,Aphelios,Rell
