<a href="https://colab.research.google.com/github/ronin-winter/Predicting_PL_Games/blob/main/Predicting_PL_Games.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import requests
import warnings

warnings.filterwarnings('ignore')

class PLAdvancedBettor:
    def __init__(self, api_key=None):
        self.api_key = api_key
        self.model_result = RandomForestClassifier(n_estimators=100, random_state=42)
        self.model_btts = RandomForestClassifier(n_estimators=100, random_state=42)
        self.match_data = None
        self.last_match_stats = {}

        # Comprehensive Name Mapping for 2025-26 Season
        # Maps common User/CSV names to Official API Full Names
        self.csv_fixer = {
            # Big Six & Established
            'Man United': 'Manchester United', 'Man Utd': 'Manchester United',
            'Man City': 'Manchester City',
            'Spurs': 'Tottenham Hotspur', 'Tottenham': 'Tottenham Hotspur',
            'Newcastle': 'Newcastle United',
            'Wolves': 'Wolverhampton Wanderers',
            'Brighton': 'Brighton and Hove Albion',
            'West Ham': 'West Ham United',
            'Chelsea': 'Chelsea', 'Arsenal': 'Arsenal', 'Liverpool': 'Liverpool',
            'Aston Villa': 'Aston Villa', 'Villa': 'Aston Villa',

            # Mid-Table & Established
            'Everton': 'Everton',
            'Crystal Palace': 'Crystal Palace', 'Palace': 'Crystal Palace',
            'Brentford': 'Brentford',
            'Fulham': 'Fulham',
            'Bournemouth': 'Bournemouth', 'Boro': 'Bournemouth',
            "Nott'm Forest": 'Nottingham Forest', 'Forest': 'Nottingham Forest',

            # Promoted Teams 2025-26 & Recent Promoted
            'Leicester': 'Leicester City', 'Leicester City': 'Leicester City',
            'Ipswich': 'Ipswich Town', 'Ipswich Town': 'Ipswich Town',
            'Burnley': 'Burnley',
            'Sunderland': 'Sunderland AFC', 'Sunderland AFC': 'Sunderland AFC',
            'Leeds': 'Leeds United', 'Leeds United': 'Leeds United',
            'Southampton': 'Southampton', 'Saints': 'Southampton'
        }

    def get_football_data(self):
        print("Downloading and standardizing historical data...")
        # Historical results are crucial for the Random Forest to understand patterns

        season_urls = {
            '2023-24': 'https://www.football-data.co.uk/mmz4281/2324/E0.csv',
            '2024-25': 'https://www.football-data.co.uk/mmz4281/2425/E0.csv',
            '2025-26': 'https://www.football-data.co.uk/mmz4281/2526/E0.csv'
        }
        all_matches = []
        for season, url in season_urls.items():
            try:
                r = requests.get(url, timeout=10)
                if r.status_code == 200:
                    from io import StringIO
                    df = pd.read_csv(StringIO(r.text))
                    df['HomeTeam'] = df['HomeTeam'].replace(self.csv_fixer)
                    df['AwayTeam'] = df['AwayTeam'].replace(self.csv_fixer)
                    all_matches.append(self.clean_season_data(df, season))
            except: continue

        if all_matches:
            self.match_data = pd.concat(all_matches, ignore_index=True)
            print(f"Database loaded: {len(self.match_data)} matches.")
            return True
        return False

    def clean_season_data(self, data, season):
        matches = []
        for _, row in data.iterrows():
            try:
                if pd.isna(row.get('FTHG')): continue
                matches.append({
                    'season': season,
                    'home_team': row.get('HomeTeam'),
                    'away_team': row.get('AwayTeam'),
                    'home_goals': int(row['FTHG']),
                    'away_goals': int(row['FTAG']),
                    'result': 'H' if row['FTHG'] > row['FTAG'] else 'A' if row['FTAG'] > row['FTHG'] else 'D',
                    'btts_result': 1 if (row['FTHG'] > 0 and row['FTAG'] > 0) else 0
                })
            except: continue
        return pd.DataFrame(matches)

    def calculate_features(self, data):
        enhanced = data.copy().sort_values(['season']).reset_index(drop=True)
        cols = ['home_strength', 'away_strength', 'home_form', 'away_form',
                'h2h_home_dominance', 'home_goals_avg', 'away_goals_avg',
                'home_btts_rate', 'away_btts_rate']
        for c in cols: enhanced[c] = 0.0

        for i, row in enhanced.iterrows():
            h_stats = self.get_stats(enhanced, row['home_team'], i)
            a_stats = self.get_stats(enhanced, row['away_team'], i)
            h2h = self.get_h2h(enhanced, row['home_team'], row['away_team'], i)

            enhanced.loc[i, 'home_strength'] = h_stats['strength']
            enhanced.loc[i, 'away_strength'] = a_stats['strength']
            enhanced.loc[i, 'home_form'] = h_stats['form']
            enhanced.loc[i, 'away_form'] = a_stats['form']
            enhanced.loc[i, 'home_goals_avg'] = h_stats['goals_for']
            enhanced.loc[i, 'away_goals_avg'] = a_stats['goals_for']
            enhanced.loc[i, 'home_btts_rate'] = h_stats['btts_rate']
            enhanced.loc[i, 'away_btts_rate'] = a_stats['btts_rate']
            enhanced.loc[i, 'h2h_home_dominance'] = h2h
        return enhanced

    def get_stats(self, data, team, idx, games=5):
        hist = data[((data['home_team']==team)|(data['away_team']==team)) & (data.index < idx)].tail(games)
        if len(hist)==0: return {'strength': 50, 'form': 5, 'goals_for': 1.5, 'btts_rate': 0.5}

        pts, scored, btts = 0, 0, 0
        for _, m in hist.iterrows():
            is_home = m['home_team'] == team
            scored += m['home_goals'] if is_home else m['away_goals']
            if m['btts_result'] == 1: btts += 1
            pts += 3 if (is_home and m['result']=='H') or (not is_home and m['result']=='A') else 1 if m['result']=='D' else 0

        return {
            'strength': (pts/len(hist))*20+20, 'form': pts,
            'goals_for': scored/len(hist), 'btts_rate': btts/len(hist)
        }

    def get_h2h(self, data, h_team, a_team, idx):
        h2h = data[(((data['home_team']==h_team)&(data['away_team']==a_team)) |
                   ((data['home_team']==a_team)&(data['away_team']==h_team))) & (data.index < idx)].tail(3)
        score = 0
        for _, m in h2h.iterrows():
            if m['result'] == 'D': continue
            winner = m['home_team'] if m['result'] == 'H' else m['away_team']
            score += 1 if winner == h_team else -1
        return score

    def train_models(self):
        if self.match_data is None: return False
        print("Training Models...")
        data = self.calculate_features(self.match_data).iloc[50:]
        X_res = data[['home_strength', 'away_strength', 'home_form', 'away_form', 'h2h_home_dominance', 'home_goals_avg', 'away_goals_avg']]
        self.model_result.fit(X_res, data['result'])
        X_btts = data[['home_goals_avg', 'away_goals_avg', 'home_btts_rate', 'away_btts_rate']]
        self.model_btts.fit(X_btts, data['btts_result'])
        return True

    def predict(self, home, away):
        idx = 999999
        h_stats = self.get_stats(self.match_data, home, idx)
        a_stats = self.get_stats(self.match_data, away, idx)
        h2h = self.get_h2h(self.match_data, home, away, idx)

        self.last_match_stats = {
            'home': home, 'away': away,
            'h_form': h_stats['form'], 'a_form': a_stats['form'],
            'h_str': h_stats['strength'], 'a_str': a_stats['strength'],
            'h2h': h2h
        }

        f_res = pd.DataFrame([{
            'home_strength': h_stats['strength'], 'away_strength': a_stats['strength'],
            'home_form': h_stats['form'], 'away_form': a_stats['form'],
            'h2h_home_dominance': h2h, 'home_goals_avg': h_stats['goals_for'], 'away_goals_avg': a_stats['goals_for']
        }])
        f_btts = pd.DataFrame([{
            'home_goals_avg': h_stats['goals_for'], 'away_goals_avg': a_stats['goals_for'],
            'home_btts_rate': h_stats['btts_rate'], 'away_btts_rate': a_stats['btts_rate']
        }])

        probs = self.model_result.predict_proba(f_res)[0]
        p_res = {c: p for c, p in zip(self.model_result.classes_, probs)}
        p_btts = self.model_btts.predict_proba(f_btts)[0][1]
        return p_res, p_btts

    def fetch_event_id(self, home_team, away_team):
        if not self.api_key: return None, home_team, away_team
        print(f"Finding Event ID for {home_team} vs {away_team}...")
        url = 'https://api.the-odds-api.com/v4/sports/soccer_epl/odds/'
        params = {'apiKey': self.api_key, 'regions': 'uk', 'markets': 'h2h', 'oddsFormat': 'decimal'}

        try:
            data = requests.get(url, params=params).json()
            if isinstance(data, list):
                for m in data:
                    if (home_team in m['home_team'] and away_team in m['away_team']) or \
                       (away_team in m['home_team'] and home_team in m['away_team']):
                        return m['id'], m['home_team'], m['away_team']
            print("❌ Match not found in current API schedule.")
            return None, home_team, away_team
        except: return None, home_team, away_team

    def fetch_specific_odds(self, event_id):
        print("Fetching extended markets (BTTS, DNB, DC)...")
        url = f'https://api.the-odds-api.com/v4/sports/soccer_epl/events/{event_id}/odds'
        params = {'apiKey': self.api_key, 'regions': 'uk', 'markets': 'h2h,btts,draw_no_bet,double_chance', 'oddsFormat': 'decimal'}
        try: return requests.get(url, params=params).json()
        except: return None

    def extract_odds(self, match_data, home_team, away_team):
        odds = {}
        def get_price(market_key, outcome_name):
            prices = []
            for bookie in match_data.get('bookmakers', []):
                for market in bookie['markets']:
                    if market['key'] == market_key:
                        for outcome in market['outcomes']:
                            if outcome_name in outcome['name'] or outcome['name'] in outcome_name:
                                prices.append(outcome['price'])
            return max(prices) if prices else None

        odds['H_ML'] = get_price('h2h', home_team)
        odds['D']    = get_price('h2h', 'Draw')
        odds['A_ML'] = get_price('h2h', away_team)
        odds['BTTS_Yes'] = get_price('btts', 'Yes')
        odds['H_DNB'] = get_price('draw_no_bet', home_team)
        odds['A_DNB'] = get_price('draw_no_bet', away_team)
        odds['DC_1X'] = get_price('double_chance', home_team)
        odds['DC_X2'] = get_price('double_chance', away_team)
        return odds

    def run_prediction(self, user_home, user_away):
        std_home = self.csv_fixer.get(user_home, user_home)
        std_away = self.csv_fixer.get(user_away, user_away)

        event_id, real_home, real_away = self.fetch_event_id(std_home, std_away)

        odds = {}
        if event_id:
            rich_data = self.fetch_specific_odds(event_id)
            if rich_data: odds = self.extract_odds(rich_data, real_home, real_away)

        p_res, p_btts = self.predict(real_home, real_away)

        print(f"\nPREDICTION: {real_home} vs {real_away}")
        print("=" * 85)
        self.print_row("Home Win", p_res.get('H', 0), odds.get('H_ML'))
        self.print_row("Draw", p_res.get('D', 0), odds.get('D'))
        self.print_row("Away Win", p_res.get('A', 0), odds.get('A_ML'))
        print("-" * 85)
        self.print_row("BTTS Yes", p_btts, odds.get('BTTS_Yes'))
        print("-" * 85)

        if p_res:
            denom = (p_res.get('H', 0) + p_res.get('A', 0))
            dnb_h = p_res.get('H', 0) / denom if denom > 0 else 0
            dnb_a = p_res.get('A', 0) / denom if denom > 0 else 0
            self.print_row("Home DNB", dnb_h, odds.get('H_DNB'))
            self.print_row("Away DNB", dnb_a, odds.get('A_DNB'))
            print("-" * 85)

            dc_1x = p_res.get('H', 0) + p_res.get('D', 0)
            dc_x2 = p_res.get('D', 0) + p_res.get('A', 0)
            self.print_row("1X (Home/Draw)", dc_1x, odds.get('DC_1X'))
            self.print_row("X2 (Away/Draw)", dc_x2, odds.get('DC_X2'))

        self.explain_model_decision()

    def print_row(self, label, prob, odd):
        if prob is None: return
        fair = 1/prob if prob > 0 else 0
        if odd:
            ev = (prob * odd) - 1
            mark = " <<< VALUE" if ev > 0.05 else ""
            print(f"{label:<15} | Model: {prob:<5.1%} | Fair: {fair:<5.2f} | Odds: {odd:<5} | EV: {ev*100:>+5.1f}%{mark}")
        else:
            print(f"{label:<15} | Model: {prob:<5.1%} | Fair: {fair:<5.2f} | Odds: N/A")

    def explain_model_decision(self):
        s = self.last_match_stats
        print("\n--- MODEL REASONING ---")
        h_form = s['h_form']
        a_form = s['a_form']

        if abs(h_form - a_form) < 2:
            print(f"1. FORM PARITY: Both teams have similar recent form ({h_form} vs {a_form} pts).")
            print("   -> This increases the Draw probability.")
        elif h_form > a_form:
            print(f"1. FORM: {s['home']} has better form ({h_form} vs {a_form} pts).")
        else:
            print(f"1. FORM: {s['away']} has better form ({a_form} vs {h_form} pts).")

        h2h = s['h2h']
        if h2h < 0:
            print(f"2. BOGEY TEAM: History favors {s['away']} (H2H Score: {h2h}).")
            print("   -> The model detects that the Away team often gets results in this matchup.")
        elif h2h > 0:
            print(f"2. HISTORY: History favors {s['home']} (H2H Score: +{h2h}).")
        else:
            print(f"2. HISTORY: Recent meetings have been even.")

        print(f"3. STRENGTH RATING: Home ({s['h_str']:.1f}) vs Away ({s['a_str']:.1f})")

def main():
    # Use your provided API Key
    API_KEY = "YOUR_API_KEY"

    bettor = PLAdvancedBettor(API_KEY)

    if not bettor.get_football_data() or not bettor.train_models():
        print("Initialization failed.")
        return

    print("\n" + "="*50)
    print(" PREMIER LEAGUE VALUE PREDICTOR 2025-26")
    print(" Type 'quit' to exit.")
    print("="*50)

    while True:
        print("\nEnter Match Details:")
        home_input = input("  Home Team: ").strip()
        if home_input.lower() in ['quit', 'exit']: break

        away_input = input("  Away Team: ").strip()
        if away_input.lower() in ['quit', 'exit']: break

        if not home_input or not away_input:
            print("  ⚠ Please enter both team names.")
            continue

        try:
            bettor.run_prediction(home_input, away_input)
        except Exception as e:
            print(f"  ⚠ Error analyzing match: {e}")

if __name__ == "__main__":
    main()

Downloading and standardizing historical data...
Database loaded: 999 matches.
Training Models...

 PREMIER LEAGUE VALUE PREDICTOR 2025-26
 Type 'quit' to exit.

Enter Match Details:
  Home Team: Manchester United
  Away Team: Tottenham
Finding Event ID for Manchester United vs Tottenham Hotspur...
Fetching extended markets (BTTS, DNB, DC)...

PREDICTION: Manchester United vs Tottenham Hotspur
Home Win        | Model: 36.3% | Fair: 2.75  | Odds: 1.7   | EV: -38.2%
Draw            | Model: 30.0% | Fair: 3.33  | Odds: 4.6   | EV: +38.0% <<< VALUE
Away Win        | Model: 33.7% | Fair: 2.97  | Odds: 5.0   | EV: +68.3% <<< VALUE
-------------------------------------------------------------------------------------
BTTS Yes        | Model: 68.8% | Fair: 1.45  | Odds: 1.62  | EV: +11.4% <<< VALUE
-------------------------------------------------------------------------------------
Home DNB        | Model: 51.9% | Fair: 1.93  | Odds: 1.28  | EV: -33.6%
Away DNB        | Model: 48.1% | Fair: 2.