# URC Predictor v1 — Auto Fixtures (Colab Ready)

This Google Colab notebook automatically fetches upcoming **United Rugby Championship (URC)** fixtures and predicts match outcomes based on:
- Home advantage (+3.5 points)
- Travel distance (extra penalty for South Africa ↔ Europe travel or long distances)
- Last year’s team performance (simple win-rate table included)

**How to use**
1. Open this notebook in Google Colab.
2. Go to **Runtime → Run all**.
3. The predictor fetches fixtures, computes results, and shows a table with: Date, Fixture, Predicted Winner, Predicted Margin, Win Probability, Sources Used.

Created: 2025-10-15 17:48 UTC

In [None]:

# URC Predictor v1 - Auto Fixtures & Basic Model
# Designed for Google Colab

# Install requirements
!pip install --quiet beautifulsoup4 lxml geopy requests

import requests
from bs4 import BeautifulSoup
import pandas as pd
import math, random
from datetime import datetime, timedelta
from geopy.distance import great_circle
from IPython.display import display, Markdown

HOME_ADVANTAGE = 3.5
TRAVEL_PENALTY_SA_EU = 2.0
LOOKAHEAD_DAYS = 7

# Minimal last-year win-rate table (editable)
last_year_win_rate = {
    "Leinster": 0.75, "Munster": 0.65, "Ulster": 0.60, "Connacht": 0.45,
    "Glasgow": 0.50, "Edinburgh": 0.40, "Ospreys": 0.35, "Scarlets": 0.42,
    "Cardiff": 0.30, "Bulls": 0.68, "Stormers": 0.66, "Sharks": 0.55,
    "Lions": 0.58, "Benetton": 0.33, "Zebre": 0.25, "Dragons": 0.28
}

# Approximate coordinates for distance calculation
team_coords = {
    "Leinster": (53.3498, -6.2603), "Munster": (51.8985, -8.4756),
    "Ulster": (54.5973, -5.9301), "Connacht": (53.2734, -9.0510),
    "Glasgow": (55.8642, -4.2518), "Edinburgh": (55.9533, -3.1883),
    "Ospreys": (51.6108, -3.9364), "Scarlets": (51.7789, -3.8286),
    "Cardiff": (51.4816, -3.1791), "Bulls": (-25.7461, 28.1881),
    "Stormers": (-33.9249, 18.4241), "Sharks": (-29.8587, 31.0218),
    "Lions": (-26.2041, 28.0473), "Benetton": (45.6676, 12.2413),
    "Zebre": (44.4056, 8.9463), "Dragons": (51.6176, -3.0330)
}

def fetch_from_urc():
    try:
        url = "https://www.unitedrugby.com/fixtures-results"
        resp = requests.get(url, timeout=12)
        if resp.status_code != 200:
            return []
        soup = BeautifulSoup(resp.text, "lxml")
        fixtures = []
        for a in soup.select("a"):
            txt = a.get_text(" ", strip=True)
            if " v " in txt or " vs " in txt:
                parts = txt.replace(" vs ", " v ").split(" v ")
                if len(parts) >= 2:
                    fixtures.append({"date": None, "home": parts[0], "away": parts[1], "source": "URC"})
        return fixtures
    except:
        return []

def fetch_from_flashscore():
    try:
        url = "https://www.flashscore.com/rugby-union/world/united-rugby-championship/fixtures/"
        headers = {"User-Agent": "Mozilla/5.0"}
        resp = requests.get(url, headers=headers, timeout=12)
        if resp.status_code != 200:
            return []
        soup = BeautifulSoup(resp.text, "lxml")
        fixtures = []
        for a in soup.select("a"):
            txt = a.get_text(" ", strip=True)
            if " v " in txt or " vs " in txt:
                parts = txt.replace(" vs ", " v ").split(" v ")
                if len(parts) >= 2:
                    fixtures.append({"date": None, "home": parts[0], "away": parts[1], "source": "FlashScore"})
        return fixtures
    except:
        return []

def fetch_from_thesportsdb():
    try:
        url = "https://www.thesportsdb.com/api/v1/json/1/eventsnextleague.php?id=4446"
        resp = requests.get(url, timeout=12)
        data = resp.json()
        events = data.get("events") or []
        fixtures = []
        for ev in events:
            date = ev.get("dateEvent") or ev.get("strDate")
            home = ev.get("strHomeTeam")
            away = ev.get("strAwayTeam")
            if home and away:
                fixtures.append({"date": date, "home": home, "away": away, "source": "TheSportsDB"})
        return fixtures
    except:
        return []

def auto_fetch_fixtures():
    all_found, sources = [], []
    urc = fetch_from_urc()
    if urc: all_found.extend(urc); sources.append("URC")
    flash = fetch_from_flashscore()
    if flash: all_found.extend(flash); sources.append("FlashScore")
    tsdb = fetch_from_thesportsdb()
    if tsdb: all_found.extend(tsdb); sources.append("TheSportsDB")
    return all_found, sources

def travel_penalty(home, away):
    sa = ["Bulls","Stormers","Sharks","Lions"]
    if (home in sa and away not in sa) or (away in sa and home not in sa):
        return TRAVEL_PENALTY_SA_EU
    try:
        if home in team_coords and away in team_coords:
            d = great_circle(team_coords[home], team_coords[away]).km
            if d > 7000: return 3.0
            if d > 2000: return 1.5
            return 0.5
    except:
        pass
    return 0.0

def get_strength(team):
    return last_year_win_rate.get(team, 0.45)

# Orchestrate
display(Markdown("## Auto-fetching fixtures and generating predictions (v1)"))
fixtures_raw, sources_used = auto_fetch_fixtures()
if not fixtures_raw:
    fixtures_raw = [{'date': (datetime.utcnow().date()+timedelta(days=3)).isoformat(),
                     'home':'Stormers','away':'Leinster','source':'Manual'}]

# Filter to next 7 days if dates exist
today = datetime.utcnow().date()
fixtures = []
for fx in fixtures_raw:
    fx_date = fx.get('date')
    keep = True
    if fx_date:
        try:
            d = datetime.strptime(fx_date, "%Y-%m-%d").date()
            keep = 0 <= (d - today).days <= LOOKAHEAD_DAYS
        except:
            keep = True
    if keep:
        fixtures.append(fx)

rows = []
for fx in fixtures:
    h, a = fx['home'], fx['away']
    s_diff = (get_strength(h) - get_strength(a)) * 10.0
    margin = (s_diff * 0.5) + HOME_ADVANTAGE - travel_penalty(h, a) + random.uniform(-2, 2)
    prob = round(100 / (1 + math.exp(-margin/6)), 1)
    winner = h if prob > 50 else a
    rows.append({
        "Date": fx.get("date") or "",
        "Fixture": f"{h} vs {a}",
        "Predicted Winner": winner,
        "Predicted Margin (pts)": round(margin, 1),
        "Win Probability (%)": prob,
        "Sources Used": ", ".join(sources_used) if sources_used else "Manual"
    })

df = pd.DataFrame(rows)
display(Markdown(f"**Sources that returned fixtures:** {', '.join(sources_used) if sources_used else 'None (manual sample used)'}"))
display(Markdown(f"**Total fixtures found (pre-filter):** {len(fixtures_raw)} | **This weekend (filtered):** {len(fixtures)}"))

display(Markdown("### Predictions"))
display(df)

# Save CSV for convenience
csv_path = "/content/urc_predictions_v1.csv"
df.to_csv(csv_path, index=False)
print(f"Saved predictions to {csv_path}")
