In [None]:
# 📦 Install dependencies
!pip install --quiet pandas bs4 ipywidgets

# 🔄 Imports
from bs4 import BeautifulSoup
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output

# 🏏 Stats Extractor Function for Test Matches & Others
def extract_stats_to_df(raw_html):
    soup = BeautifulSoup(raw_html, 'html.parser')

    # Extract innings headers and map team order
    innings_headers = soup.select('span.ds-text-title-xs.ds-font-bold.ds-capitalize')
    all_teams = [h.get_text(strip=True).replace(' Innings', '') for h in innings_headers]
    unique_teams = list(dict.fromkeys(all_teams))  # preserve order, remove duplicates

    if len(unique_teams) < 2:
        raise ValueError("❌ Could not detect two distinct teams.")

    team_1, team_2 = unique_teams[:2]

    # Initialise stat collectors
    top_scores = {}
    sixes = {team_1: 0, team_2: 0}
    fours = {team_1: 0, team_2: 0}
    run_outs = {team_1: 0, team_2: 0}
    top_batters = {team_1: ('', 0), team_2: ('', 0)}

    # Get all batting tables (one per innings)
    batting_tables = soup.find_all('table', class_='ci-scorecard-table')

    for i, table in enumerate(batting_tables):
        rows = table.find_all('tr')[1:]
        current_team = all_teams[i] if i < len(all_teams) else None
        if current_team not in (team_1, team_2):
            continue

        bowled_by = team_2 if current_team == team_1 else team_1

        for row in rows:
            cols = row.find_all('td')
            if len(cols) < 8:
                continue
            name = cols[0].get_text(strip=True).split('†')[0].split('(')[0].strip()

            try:
                runs = int(cols[2].get_text(strip=True))
                _4s = int(cols[5].get_text(strip=True))
                _6s = int(cols[6].get_text(strip=True))
            except ValueError:
                continue

            fours[current_team] += _4s
            sixes[current_team] += _6s

            # track highest individual
            if name not in top_scores or runs > top_scores[name]:
                top_scores[name] = runs

            # track top batter per innings
            if runs > top_batters[current_team][1]:
                top_batters[current_team] = (name, runs)

        # Count run outs (credited to the bowling side)
        for row in rows:
            text = ' '.join(td.get_text(strip=True) for td in row.find_all('td'))
            if 'run out' in text.lower():
                run_outs[bowled_by] += 1

    # Identify overall highest scorer
    top_name_all, top_runs_all = max(top_scores.items(), key=lambda x: x[1])
    top_team_all = team_1 if top_name_all in [top_batters[team_1][0]] else team_2

    # Build results DataFrame
    df = pd.DataFrame({
        'Stat': [
            'Highest Individual Score',
            f'Top Batter – {team_1}',
            f'Top Batter – {team_2}',
            'Total Match Fours',
            'Most Match Sixes',
            'Most Run Outs (by bowling side)'
        ],
        'Value': [
            f"{top_name_all} ({top_runs_all}) – {top_team_all}",
            f"{top_batters[team_1][0]} ({top_batters[team_1][1]})",
            f"{top_batters[team_2][0]} ({top_batters[team_2][1]})",
            f"{team_1} {fours[team_1]} : {fours[team_2]} {team_2}",
            f"{team_1} {sixes[team_1]} : {sixes[team_2]} {team_2}",
            f"{team_2} {run_outs[team_2]} : {run_outs[team_1]} {team_1}"
        ]
    })

    return df

# 🖼️ Interface
html_input = widgets.Textarea(
    value='',
    placeholder='Paste full Ctrl+U HTML here…',
    layout=widgets.Layout(width='100%', height='300px')
)

button = widgets.Button(description='Extract Stats', button_style='primary')
output = widgets.Output()

def on_click(b):
    with output:
        clear_output()
        raw_html = html_input.value.strip()
        if not raw_html:
            print("❗ Please paste the full HTML first.")
            return
        try:
            df = extract_stats_to_df(raw_html)
            styled = df.style.apply(highlight_boundaries, axis=1)
            display(styled)
        except Exception as e:
            print(f"❌ Error: {e}")


button.on_click(on_click)

display(widgets.HTML("<h3>🏏 Paste ESPNcricinfo Scorecard HTML Below</h3>"))
display(html_input, button, output)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/1.6 MB[0m [31m12.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━[0m [32m1.4/1.6 MB[0m [31m27.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25h

HTML(value='<h3>🏏 Paste ESPNcricinfo Scorecard HTML Below</h3>')

Textarea(value='', layout=Layout(height='300px', width='100%'), placeholder='Paste full Ctrl+U HTML here…')

Button(button_style='primary', description='Extract Stats', style=ButtonStyle())

Output()

In [None]:
import re

def highlight_boundaries(row):
    # colour the side with more 4s / 6s green
    if row["Metric"] not in ["Total Match Fours", "Most Match Sixes"]:
        return [''] * len(row)

    m = re.match(r'(.+?)\s+(\d+)\s*:\s*(\d+)\s+(.+)', row["Value"])
    if not m:
        return [''] * len(row)

    left_team, left_cnt, right_cnt, right_team = (
        m.group(1), int(m.group(2)), int(m.group(3)), m.group(4)
    )

    return [''] + (['color: green', ''] if left_cnt > right_cnt
                   else ['', 'color: green'] if right_cnt > left_cnt
                   else ['', ''])
