<a href="https://colab.research.google.com/github/ronin-winter/Predicting_PL_Games/blob/main/SportsBetting_ManUtd_vs_Spurs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import requests
import warnings

warnings.filterwarnings('ignore')

class PLAnalyticBettor:
    def __init__(self, api_key=None):
        self.api_key = api_key
        self.model_result = RandomForestClassifier(n_estimators=100, random_state=42)
        self.model_btts = RandomForestClassifier(n_estimators=100, random_state=42)
        self.match_data = None
        self.csv_fixer = {
            'Man United': 'Manchester United', 'Man City': 'Manchester City',
            'Spurs': 'Tottenham Hotspur', 'Tottenham': 'Tottenham Hotspur',
            'Newcastle': 'Newcastle United', 'Wolves': 'Wolverhampton Wanderers',
            'Brighton': 'Brighton and Hove Albion', 'West Ham': 'West Ham United',
            "Nott'm Forest": 'Nottingham Forest', 'Leicester': 'Leicester City',
            'Leeds': 'Leeds United', 'Sheffield United': 'Sheffield United',
            'Luton': 'Luton Town', 'Ipswich': 'Ipswich Town'
        }

    def get_football_data(self):
        print("Downloading and standardizing historical data...")
        season_urls = {
            '2023-24': 'https://www.football-data.co.uk/mmz4281/2324/E0.csv',
            '2024-25': 'https://www.football-data.co.uk/mmz4281/2425/E0.csv',
            '2025-26': 'https://www.football-data.co.uk/mmz4281/2526/E0.csv'
        }
        all_matches = []
        for season, url in season_urls.items():
            try:
                r = requests.get(url, timeout=10)
                if r.status_code == 200:
                    from io import StringIO
                    df = pd.read_csv(StringIO(r.text))
                    df['HomeTeam'] = df['HomeTeam'].replace(self.csv_fixer)
                    df['AwayTeam'] = df['AwayTeam'].replace(self.csv_fixer)
                    all_matches.append(self.clean_season_data(df, season))
            except: continue

        if all_matches:
            self.match_data = pd.concat(all_matches, ignore_index=True)
            print(f"Database loaded: {len(self.match_data)} matches.")
            return True
        return False

    def clean_season_data(self, data, season):
        matches = []
        for _, row in data.iterrows():
            try:
                if pd.isna(row.get('FTHG')): continue
                matches.append({
                    'season': season,
                    'home_team': row.get('HomeTeam'),
                    'away_team': row.get('AwayTeam'),
                    'home_goals': int(row['FTHG']),
                    'away_goals': int(row['FTAG']),
                    'result': 'H' if row['FTHG'] > row['FTAG'] else 'A' if row['FTAG'] > row['FTHG'] else 'D',
                    'btts_result': 1 if (row['FTHG'] > 0 and row['FTAG'] > 0) else 0
                })
            except: continue
        return pd.DataFrame(matches)

    def calculate_features(self, data):
        enhanced = data.copy().sort_values(['season']).reset_index(drop=True)
        cols = ['home_strength', 'away_strength', 'home_form', 'away_form',
                'h2h_home_dominance', 'home_goals', 'away_goals',
                'home_btts_rate', 'away_btts_rate']
        for c in cols: enhanced[c] = 0.0

        for i, row in enhanced.iterrows():
            h_stats = self.get_stats(enhanced, row['home_team'], i)
            a_stats = self.get_stats(enhanced, row['away_team'], i)
            h2h = self.get_h2h(enhanced, row['home_team'], row['away_team'], i)

            enhanced.loc[i, 'home_strength'] = h_stats['strength']
            enhanced.loc[i, 'away_strength'] = a_stats['strength']
            enhanced.loc[i, 'home_form'] = h_stats['form']
            enhanced.loc[i, 'away_form'] = a_stats['form']
            enhanced.loc[i, 'home_goals'] = h_stats['goals_for']
            enhanced.loc[i, 'away_goals'] = a_stats['goals_for']
            enhanced.loc[i, 'home_btts_rate'] = h_stats['btts_rate']
            enhanced.loc[i, 'away_btts_rate'] = a_stats['btts_rate']
            enhanced.loc[i, 'h2h_home_dominance'] = h2h
        return enhanced

    def get_stats(self, data, team, idx, games=5):
        hist = data[((data['home_team']==team)|(data['away_team']==team)) & (data.index < idx)].tail(games)
        if len(hist)==0: return {'strength': 50, 'form': 5, 'goals_for': 1.5, 'btts_rate': 0.5}

        pts, scored, btts = 0, 0, 0
        for _, m in hist.iterrows():
            is_home = m['home_team'] == team
            scored += m['home_goals'] if is_home else m['away_goals']
            if m['btts_result'] == 1: btts += 1
            pts += 3 if (is_home and m['result']=='H') or (not is_home and m['result']=='A') else 1 if m['result']=='D' else 0

        return {
            'strength': (pts/len(hist))*20+20, 'form': pts,
            'goals_for': scored/len(hist), 'btts_rate': btts/len(hist)
        }

    def get_h2h(self, data, h_team, a_team, idx):
        h2h = data[(((data['home_team']==h_team)&(data['away_team']==a_team)) |
                   ((data['home_team']==a_team)&(data['away_team']==h_team))) & (data.index < idx)].tail(3)
        score = 0
        for _, m in h2h.iterrows():
            if m['result'] == 'D': continue
            winner = m['home_team'] if m['result'] == 'H' else m['away_team']
            score += 1 if winner == h_team else -1
        return score

    def train_models(self):
        if self.match_data is None: return False
        print("Training Models...")
        data = self.calculate_features(self.match_data).iloc[50:]
        X_res = data[['home_strength', 'away_strength', 'home_form', 'away_form', 'h2h_home_dominance', 'home_goals', 'away_goals']]
        self.model_result.fit(X_res, data['result'])
        X_btts = data[['home_goals', 'away_goals', 'home_btts_rate', 'away_btts_rate']]
        self.model_btts.fit(X_btts, data['btts_result'])
        return True

    def predict(self, home, away):
        idx = 999999
        h_stats = self.get_stats(self.match_data, home, idx)
        a_stats = self.get_stats(self.match_data, away, idx)
        h2h = self.get_h2h(self.match_data, home, away, idx)

        # We store these stats to explain the prediction later
        self.last_match_stats = {
            'home': home, 'away': away,
            'h_form': h_stats['form'], 'a_form': a_stats['form'],
            'h_str': h_stats['strength'], 'a_str': a_stats['strength'],
            'h2h': h2h
        }

        f_res = pd.DataFrame([{
            'home_strength': h_stats['strength'], 'away_strength': a_stats['strength'],
            'home_form': h_stats['form'], 'away_form': a_stats['form'],
            'h2h_home_dominance': h2h, 'home_goals': h_stats['goals_for'], 'away_goals': a_stats['goals_for']
        }])
        f_btts = pd.DataFrame([{
            'home_goals': h_stats['goals_for'], 'away_goals': a_stats['goals_for'],
            'home_btts_rate': h_stats['btts_rate'], 'away_btts_rate': a_stats['btts_rate']
        }])

        probs = self.model_result.predict_proba(f_res)[0]
        p_res = {c: p for c, p in zip(self.model_result.classes_, probs)}
        p_btts = self.model_btts.predict_proba(f_btts)[0][1]
        return p_res, p_btts

    # --- API FETCHING ---
    def fetch_event_id(self, home_team, away_team):
        if not self.api_key: return None, home_team, away_team
        print(f"Finding Event ID for {home_team} vs {away_team}...")
        url = 'https://api.the-odds-api.com/v4/sports/soccer_epl/odds/'
        params = {'apiKey': self.api_key, 'regions': 'uk', 'markets': 'h2h', 'oddsFormat': 'decimal'}

        try:
            data = requests.get(url, params=params).json()
            if isinstance(data, list):
                for m in data:
                    if (home_team in m['home_team'] and away_team in m['away_team']) or \
                       (away_team in m['home_team'] and home_team in m['away_team']):
                        return m['id'], m['home_team'], m['away_team']
            print("‚ùå Match not found in current schedule.")
            return None, home_team, away_team
        except: return None, home_team, away_team

    def fetch_specific_odds(self, event_id):
        print("Fetching extended markets...")
        url = f'https://api.the-odds-api.com/v4/sports/soccer_epl/events/{event_id}/odds'
        params = {'apiKey': self.api_key, 'regions': 'uk', 'markets': 'h2h,btts,draw_no_bet,double_chance', 'oddsFormat': 'decimal'}
        try: return requests.get(url, params=params).json()
        except: return None

    def extract_odds(self, match_data, home_team, away_team):
        odds = {}
        def get_price(market_key, outcome_name):
            prices = []
            for bookie in match_data.get('bookmakers', []):
                for market in bookie['markets']:
                    if market['key'] == market_key:
                        for outcome in market['outcomes']:
                            if outcome_name in outcome['name'] or outcome['name'] in outcome_name:
                                prices.append(outcome['price'])
            return max(prices) if prices else None

        odds['H_ML'] = get_price('h2h', home_team)
        odds['D']    = get_price('h2h', 'Draw')
        odds['A_ML'] = get_price('h2h', away_team)
        odds['BTTS_Yes'] = get_price('btts', 'Yes')
        odds['H_DNB'] = get_price('draw_no_bet', home_team)
        # --- NEW: AWAY DNB ---
        odds['A_DNB'] = get_price('draw_no_bet', away_team)
        odds['DC_1X'] = get_price('double_chance', home_team)
        odds['DC_X2'] = get_price('double_chance', away_team)
        return odds

    def run_prediction(self, user_home, user_away):
        event_id, real_home, real_away = self.fetch_event_id(user_home, user_away)

        odds = {}
        if event_id:
            rich_data = self.fetch_specific_odds(event_id)
            if rich_data: odds = self.extract_odds(rich_data, real_home, real_away)

        p_res, p_btts = self.predict(real_home, real_away)

        print(f"\nPREDICTION: {real_home} vs {real_away}")
        print("=" * 85)
        self.print_row("Home Win", p_res.get('H'), odds.get('H_ML'))
        self.print_row("Draw", p_res.get('D'), odds.get('D'))
        self.print_row("Away Win", p_res.get('A'), odds.get('A_ML'))
        print("-" * 85)
        self.print_row("BTTS Yes", p_btts, odds.get('BTTS_Yes'))
        print("-" * 85)

        # DNB Calculations
        dnb_h = p_res['H']/(p_res['H']+p_res['A'])
        dnb_a = p_res['A']/(p_res['H']+p_res['A']) # Away DNB Probability

        self.print_row("Home DNB", dnb_h, odds.get('H_DNB'))
        self.print_row("Away DNB", dnb_a, odds.get('A_DNB')) # Print Away DNB
        print("-" * 85)

        # Double Chance
        dc_1x = p_res['H'] + p_res['D']
        dc_x2 = p_res['D'] + p_res['A']
        self.print_row("1X (Home/Draw)", dc_1x, odds.get('DC_1X'))
        self.print_row("X2 (Away/Draw)", dc_x2, odds.get('DC_X2'))

        # --- EXPLAINER SECTION ---
        self.explain_model_decision()

    def print_row(self, label, prob, odd):
        if prob is None: return
        fair = 1/prob if prob>0 else 0
        if odd:
            ev = (prob * odd) - 1
            mark = " <<< VALUE" if ev > 0.05 else ""
            print(f"{label:<15} | Model: {prob:<5.1%} | Fair: {fair:<5.2f} | Odds: {odd:<5} | EV: {ev*100:>+5.1f}%{mark}")
        else:
            print(f"{label:<15} | Model: {prob:<5.1%} | Fair: {fair:<5.2f} | Odds: N/A")

    def explain_model_decision(self):
        s = self.last_match_stats
        print("\n--- MODEL REASONING ---")
        print(f"Why is the model predicting this?")

        # 1. Compare Form
        h_form = s['h_form'] # Points in last 5 games
        a_form = s['a_form']

        if abs(h_form - a_form) < 2:
            print(f"1. FORM PARITY: Both teams have similar recent form ({h_form} vs {a_form} points).")
            print("   -> This increases the Draw probability significantly.")
        elif h_form > a_form:
            print(f"1. FORM: {s['home']} has better form ({h_form} vs {a_form} pts).")
        else:
            print(f"1. FORM: {s['away']} has better form ({a_form} vs {h_form} pts).")

        # 2. H2H
        h2h = s['h2h']
        if h2h < 0:
            print(f"2. BOGEY TEAM: History favors {s['away']} (H2H Score: {h2h}).")
            print("   -> The model detects that the Away team often gets results in this matchup.")
        elif h2h > 0:
            print(f"2. HISTORY: History favors {s['home']} (H2H Score: +{h2h}).")
        else:
            print(f"2. HISTORY: Recent meetings have been even.")

        # 3. Overall Strength (Calculated)
        print(f"3. STRENGTH RATING: Home ({s['h_str']:.1f}) vs Away ({s['a_str']:.1f})")

        if s['h_str'] < 50 and s['a_str'] < 50:
            print("   -> Both teams are rated 'Below Average' based on recent data.")
            print("   -> When two struggling teams meet, the model often defaults to 'Draw' or 'Double Chance'.")

def main():
    API_KEY = "7f1026008ccd6ffd493a2e7579cdf365"
    bettor = PLAnalyticBettor(API_KEY)
    if bettor.get_football_data() and bettor.train_models():
        bettor.run_prediction('Manchester United', 'Tottenham Hotspur')

if __name__ == "__main__":
    main()

Downloading and standardizing historical data...
Database loaded: 999 matches.
Training Models...
Finding Event ID for Manchester United vs Tottenham Hotspur...
Fetching extended markets...

PREDICTION: Manchester United vs Tottenham Hotspur
Home Win        | Model: 22.9% | Fair: 4.36  | Odds: 1.7   | EV: -61.0%
Draw            | Model: 59.2% | Fair: 1.69  | Odds: 4.6   | EV: +172.3% <<< VALUE
Away Win        | Model: 17.9% | Fair: 5.60  | Odds: 5.0   | EV: -10.7%
-------------------------------------------------------------------------------------
BTTS Yes        | Model: 48.9% | Fair: 2.04  | Odds: 1.62  | EV: -20.8%
-------------------------------------------------------------------------------------
Home DNB        | Model: 56.2% | Fair: 1.78  | Odds: 1.28  | EV: -28.0%
Away DNB        | Model: 43.8% | Fair: 2.29  | Odds: 3.45  | EV: +50.9% <<< VALUE
-------------------------------------------------------------------------------------
1X (Home/Draw)  | Model: 82.2% | Fair: 1.22  | 

In [3]:
import requests
from datetime import datetime, timezone
from dateutil import parser

# PASTE YOUR API KEY HERE
API_KEY = "7f1026008ccd6ffd493a2e7579cdf365"

def get_upcoming_schedule():
    # 1. Define the endpoint for Premier League (soccer_epl)
    # We ask for 'h2h' (Match Winner) odds just to see who is listing the game
    url = 'https://api.the-odds-api.com/v4/sports/soccer_epl/odds'

    params = {
        'apiKey': API_KEY,
        'regions': 'uk',      # Focus on UK bookmakers
        'markets': 'h2h',     # Minimum market to check availability
        'oddsFormat': 'decimal'
    }

    print("Connecting to The Odds API...")

    try:
        response = requests.get(url, params=params)
        matches = response.json()

        # Check for error messages (e.g., invalid key, quota reached)
        if isinstance(matches, dict) and 'message' in matches:
            print(f"‚ùå API Error: {matches['message']}")
            return

        if not matches:
            print("‚úÖ Connection successful, but NO upcoming matches were returned.")
            print("   (This usually means no games are scheduled for the next 7 days).")
            return

        print(f"\n‚úÖ Found {len(matches)} matches in the schedule:\n")

        # Header for the table
        print(f"{'DATE':<12} | {'TIME':<8} | {'MATCH':<35} | {'STATUS'}")
        print("-" * 75)

        # Get current UTC time to compare
        now = datetime.now(timezone.utc)

        # Sort matches by time
        sorted_matches = sorted(matches, key=lambda x: x['commence_time'])

        for match in sorted_matches:
            # Parse start time
            start_time = parser.parse(match['commence_time'])

            # Format readable date/time (Local System Time)
            local_time = start_time.astimezone()
            date_str = local_time.strftime('%d-%b') # e.g., 07-Feb
            time_str = local_time.strftime('%H:%M') # e.g., 15:00

            # Match Details
            home = match['home_team']
            away = match['away_team']
            match_name = f"{home} vs {away}"

            # Determine Status
            if start_time < now:
                status = "üî¥ LIVE / PLAYING"
            else:
                # Calculate hours away
                hours_diff = (start_time - now).total_seconds() / 3600
                if hours_diff < 24:
                    status = f"‚ö™ In {hours_diff:.1f} hours"
                else:
                    days = hours_diff / 24
                    status = f"‚ö™ In {days:.1f} days"

            print(f"{date_str:<12} | {time_str:<8} | {match_name:<35} | {status}")

        print("-" * 75)
        print("\nNote: The API typically provides data for the next 7-10 days.")

    except Exception as e:
        print(f"‚ùå Connection Failed: {e}")

if __name__ == "__main__":
    get_upcoming_schedule()

Connecting to The Odds API...

‚úÖ Found 21 matches in the schedule:

DATE         | TIME     | MATCH                               | STATUS
---------------------------------------------------------------------------
02-Feb       | 20:00    | Sunderland vs Burnley               | ‚ö™ In 3.7 hours
06-Feb       | 20:00    | Leeds United vs Nottingham Forest   | ‚ö™ In 4.2 days
07-Feb       | 12:30    | Manchester United vs Tottenham Hotspur | ‚ö™ In 4.8 days
07-Feb       | 15:00    | Arsenal vs Sunderland               | ‚ö™ In 4.9 days
07-Feb       | 15:00    | Bournemouth vs Aston Villa          | ‚ö™ In 4.9 days
07-Feb       | 15:00    | Burnley vs West Ham United          | ‚ö™ In 4.9 days
07-Feb       | 15:00    | Wolverhampton Wanderers vs Chelsea  | ‚ö™ In 4.9 days
07-Feb       | 15:00    | Fulham vs Everton                   | ‚ö™ In 4.9 days
07-Feb       | 17:30    | Newcastle United vs Brentford       | ‚ö™ In 5.0 days
08-Feb       | 14:00    | Brighton and Hove Albion vs Cryst