In [None]:
%pip install playwright beautifulsoup4 pandas
%playwright install chromium



In [7]:
import time
import random
import pandas as pd
from bs4 import BeautifulSoup
from playwright.async_api import async_playwright

# We define the Match IDs we want to scrape
match_ids = ["74673", "74674", "74675", "74676"]

async def scrape_lol_data(ids):
    all_game_data = []
    
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
        )
        page = await context.new_page()

        for match_id in ids:
            game_url = f"https://gol.gg/game/stats/{match_id}/page-game/"
            timeline_url = f"https://gol.gg/game/stats/{match_id}/page-timeline/"
            
            # --- STEP 1: VISIT GAME PAGE FOR CHAMPIONS ---
            print(f"Processing Match {match_id} | Visiting Summary...")
            await page.goto(game_url, wait_until="domcontentloaded")
            html_game = await page.content()
            soup_game = BeautifulSoup(html_game, 'html.parser')
            
            # Extract Teams and Winner
            blue_header = soup_game.find('div', class_='blue-line-header')
            red_header = soup_game.find('div', class_='red-line-header')
            
            team_blue = blue_header.find('a').get_text(strip=True)
            team_red = red_header.find('a').get_text(strip=True)
            winner = team_blue if "WIN" in blue_header.get_text() else team_red
            
            # Map Players to Champions and Teams
            # GoL.gg lists players in a specific table; we grab them in order
            player_to_info = {}
            champions_list = []
            
            # Finding player rows (usually contain links to player-stats)
            player_links = soup_game.find_all('a', href=lambda x: x and '../players/player-stats/' in x)
            
            # We only want the first 10 unique players in the main table
            seen_players = []
            for link in player_links:
                name = link.get_text(strip=True)
                if name and name not in seen_players:
                    # Find the champion image associated with this player
                    # Usually it's in the previous cell or the same row
                    parent_row = link.find_parent('tr')
                    champ_img = parent_row.find('img', src=lambda x: x and 'champions_icon' in x)
                    champ_name = champ_img.get('alt', 'Unknown') if champ_img else "Unknown"
                    
                    # Assign team based on index (0-4 Blue, 5-9 Red)
                    current_team = team_blue if len(seen_players) < 5 else team_red
                    player_to_info[name] = {"champ": champ_name, "team": current_team}
                    champions_list.append(champ_name)
                    seen_players.append(name)
                
                if len(seen_players) == 10: break

            # --- STEP 2: VISIT TIMELINE PAGE FOR FT5 ---
            print(f"Visiting Timeline for {match_id}...")
            await page.goto(timeline_url, wait_until="domcontentloaded")
            html_time = await page.content()
            soup_time = BeautifulSoup(html_time, 'html.parser')
            
            timeline_table = soup_time.find('table', class_='timeline')
            kill_events = []
            blue_kills, red_kills = 0, 0
            ft5_winner = "N/A"
            
            if timeline_table:
                for row in timeline_table.find_all('tr'):
                    cols = row.find_all('td')
                    if len(cols) < 7: continue
                    
                    action_img = cols[4].find('img')
                    if action_img and 'kill-icon.png' in action_img.get('src', ''):
                        killer = cols[2].get_text(strip=True)
                        victim = cols[6].get_text(strip=True)
                        
                        # Get info from our Step 1 map
                        k_info = player_to_info.get(killer, {"champ": "???", "team": "Unknown"})
                        v_info = player_to_info.get(victim, {"champ": "???", "team": "Unknown"})
                        
                        kill_events.append(f"{killer} ({k_info['champ']}) killed {victim} ({v_info['champ']})")
                        
                        if k_info['team'] == team_blue: blue_kills += 1
                        else: red_kills += 1
                        
                        if ft5_winner == "N/A":
                            if blue_kills == 5: ft5_winner = team_blue
                            elif red_kills == 5: ft5_winner = team_red
                    
                    if len(kill_events) >= 15: break

            # --- STEP 3: CONSOLIDATE DATA ---
            game_entry = {
                "Game ID": match_id,
                "Team Blue": team_blue,
                "Team Red": team_red,
                "Winner": winner,
                "FT5 Winner": ft5_winner
            }
            
            # Dynamically add Champion columns 1-10
            for i, champ in enumerate(champions_list):
                game_entry[f"Champ {i+1}"] = champ
            
            # Add first 5 kill logs as a helper string
            game_entry["Kill Log"] = " | ".join(kill_events[:5])
            
            all_game_data.append(game_entry)
            
            # Sleep to avoid detection
            sleep = random.uniform(5, 9)
            print(f"Done with {match_id}. Sleeping {sleep:.2f}s...")
            time.sleep(sleep)

        await browser.close()
    return pd.DataFrame(all_game_data)

# Execute the scraper
df_final = await scrape_lol_data(match_ids)

# Display the result
df_final

Processing Match 74673 | Visiting Summary...
Visiting Timeline for 74673...
Done with 74673. Sleeping 7.20s...
Processing Match 74674 | Visiting Summary...
Visiting Timeline for 74674...
Done with 74674. Sleeping 6.63s...
Processing Match 74675 | Visiting Summary...
Visiting Timeline for 74675...
Done with 74675. Sleeping 5.41s...
Processing Match 74676 | Visiting Summary...
Visiting Timeline for 74676...
Done with 74676. Sleeping 6.30s...


Unnamed: 0,Game ID,Team Blue,Team Red,Winner,FT5 Winner,Champ 1,Champ 2,Champ 3,Champ 4,Champ 5,Champ 6,Champ 7,Champ 8,Champ 9,Champ 10,Kill Log
0,74673,DN SOOPers,Dplus KIA,Dplus KIA,DN SOOPers,Gwen,Vi,Taliyah,Corki,Nautilus,Ambessa,Xin Zhao,Azir,Ezreal,Leona,Peter (Nautilus) killed Smash (Ezreal) | deokd...
1,74674,Dplus KIA,DN SOOPers,Dplus KIA,Dplus KIA,KSante,Jarvan IV,Ryze,Aphelios,Lulu,Rumble,Sejuani,Yone,Kalista,Renata Glasc,Smash (Aphelios) killed Pyosik (Sejuani) | Sho...
2,74675,Dplus KIA,DN SOOPers,DN SOOPers,DN SOOPers,Aurora,Lee Sin,LeBlanc,Jhin,Bard,Renekton,Wukong,Galio,Varus,Rakan,Peter (Rakan) killed Career (Bard) | deokdam (...
3,74676,DN SOOPers,Dplus KIA,Dplus KIA,Dplus KIA,Kennen,Naafiri,Viktor,Yunara,Neeko,Sion,Aatrox,Ahri,Ashe,Seraphine,Clozer (Viktor) killed ShowMaker (Ahri) | Pyos...
