<a href="https://colab.research.google.com/github/ronettawillis/data-analyst-portfolio/blob/main/SportsTest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Create a simple, self-contained sports betting algorithm package with:
# - A Python script: betting_algo.py
# - A CSV template + tiny synthetic dataset: data/games.csv
# - A README.txt with instructions
# - Run the script once on the synthetic data and show the resulting recommendations table

import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

base_dir = "/mnt/data/betting_package"
data_dir = os.path.join(base_dir, "data")
os.makedirs(data_dir, exist_ok=True)

# ---------------------------
# Create synthetic dataset
# ---------------------------
np.random.seed(7)

teams = ["Lions", "Tigers", "Bears", "Wolves", "Hawks", "Sharks"]
start_date = datetime(2024, 9, 1)
rows = []

# Generate 80 historical games with scores to fit Elo & error sigma
for i in range(80):
    home = np.random.choice(teams)
    away = np.random.choice([t for t in teams if t != home])
    date = start_date + timedelta(days=int(i*3))  # spaced days
    # Create a synthetic true rating for each team to produce plausible scores
    # (Only used to make the dataset coherent. The script doesn't rely on this.)
    true_ratings = {t: 1500 + 80*np.sin(k) for k, t in enumerate(teams)}
    home_edge = 2.0  # points
    rdiff = (true_ratings[home] - true_ratings[away]) / 25.0  # translate ratings to points
    # Simulate margin with noise
    margin = rdiff + home_edge + np.random.normal(0, 10.5)
    home_score = int(100 + margin/2 + np.random.normal(0, 5))
    away_score = int(100 - margin/2 + np.random.normal(0, 5))
    # Book spread around "true" margin with noise (what the sportsbook offers)
    book_spread = round(-(rdiff + home_edge + np.random.normal(0, 2.0)), 1)  # negative means home favored
    # Typical -110 odds
    book_odds = -110
    rows.append([date.strftime("%Y-%m-%d"), "SYN", home, away, book_spread, book_odds, home_score, away_score])

# Add 8 upcoming games (no scores) for recommendations
future_date = start_date + timedelta(days=80*3 + 3)
for i in range(8):
    home = np.random.choice(teams)
    away = np.random.choice([t for t in teams if t != home])
    date = future_date + timedelta(days=int(i*2))
    book_spread = round(np.random.normal(0, 6), 1)  # random line
    book_odds = -110
    rows.append([date.strftime("%Y-%m-%d"), "SYN", home, away, book_spread, book_odds, "", ""])

df = pd.DataFrame(rows, columns=[
    "date","league","home_team","away_team","book_spread","book_odds","home_score","away_score"
])
csv_path = os.path.join(data_dir, "games.csv")
df.to_csv(csv_path, index=False)


# ---------------------------
# Write README
# ---------------------------
readme = f"""Sports Betting Algorithm (Spread Model) — Quick Start
======================================================

Files
-----
- betting_algo.py : Main script to train an Elo-based spread model and output weekly recommendations.
- data/games.csv  : Input dataset. Historical rows include final scores; upcoming rows leave score columns empty.
- output/recommendations.csv : Model picks for upcoming games, with Kelly bet sizes.

Install & Run
-------------
1) Ensure you have Python 3.9+ with pandas and numpy installed.
2) Update data/games.csv with your league's games and lines. Keep the column names.
3) Run:
   python betting_algo.py --data data/games.csv --bankroll 10000 --kelly 0.5 --edge_pts 2.0 --home_adv 2.0

Columns (data/games.csv)
------------------------
- date (YYYY-MM-DD)
- league (e.g., NFL, NBA, etc. — free text)
- home_team, away_team (string identifiers; must be consistent across rows)
- book_spread (negative means HOME is favored by that many points)
- book_odds (American odds, e.g., -110, +105)
- home_score, away_score: leave empty for future games

What the script does
--------------------
1) Fits Elo ratings using historical rows with final scores.
2) Estimates spread error standard deviation from historical residuals.
3) Projects model spread for upcoming games.
4) Computes cover probabilities from a normal error model.
5) Calculates edge vs. sportsbook line and fractional Kelly stake.
6) Saves picks to output/recommendations.csv.

Tips for a $2,000/week Target
------------------------------
- Bankroll matters. With -110 odds and a modest edge (~3-5%), you'll typically stake 1–2% of bankroll per play (fractional Kelly).
- Volume: More bets with small edges can be more stable than few big bets.
- Discipline: Only bet when the model edge ≥ edge_pts threshold (default 2 points).
- Iterate: Refit Elo weekly; add injuries/rest/weather as extra features if you can collect them.

Disclaimer
----------
No model can guarantee profits. Use at your own risk and bet responsibly.
"""

with open(os.path.join(base_dir, "README.txt"), "w") as f:
    f.write(readme)


# ---------------------------
# Write the algorithm script
# ---------------------------
script = r'''#!/usr/bin/env python3
import argparse
import pandas as pd
import numpy as np
import os # Added import for os
from math import erf, sqrt

def american_to_decimal(odds):
    if odds > 0:
        return 1 + odds/100.0
    else:
        return 1 + 100.0/abs(odds)

def normal_cdf(x, mu=0.0, sigma=1.0):
    # CDF of normal at x
    z = (x - mu) / (sigma * sqrt(2))
    return 0.5 * (1 + erf(z))

def kelly_fraction(p, dec_odds, frac=1.0):
    # p = win probability, b = decimal_odds - 1
    b = dec_odds - 1.0
    edge = p*b - (1-p)
    if b <= 0 or edge <= 0:
        return 0.0
    k = edge / b
    return max(0.0, min(1.0, k*frac))

def fit_elo(df_hist, K=25.0, home_adv_pts=2.0):
    # Initialize ratings
    teams = pd.unique(pd.concat([df_hist['home_team'], df_hist['away_team']]))
    elo = {t: 1500.0 for t in teams}

    # Convert point margin to a win prob proxy using logistic; or use sign only.
    # We'll use a margin-based update: expected margin ~ (elo_diff/25 + home_adv_pts)
    for _, row in df_hist.sort_values('date').iterrows():
        h, a = row['home_team'], row['away_team']
        hs, as_ = row['home_score'], row['away_score']
        if pd.isna(hs) or pd.isna(as_):
            continue
        margin = hs - as_
        pred_margin = (elo[h] - elo[a]) / 25.0 + home_adv_pts
        error = margin - pred_margin
        # Update proportional to error
        elo[h] += K * (error / 10.0)
        elo[a] -= K * (error / 10.0)
    return elo

def project_spread(h_elo, a_elo, home_adv_pts=2.0):
    # Positive means home expected to win by that many points
    return (h_elo - a_elo) / 25.0 + home_adv_pts

def estimate_sigma(residuals):
    # Robust sigma estimate (std of residuals)
    res = np.array(residuals)
    res = res[~np.isnan(res)]
    if len(res) < 5:
        return 12.0  # fallback (points), sport-dependent
    return float(np.std(res, ddof=1))

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('--data', required=True, help='Path to games CSV')
    ap.add_argument('--bankroll', type=float, default=10000.0)
    ap.add_argument('--kelly', type=float, default=0.5, help='Fractional Kelly (0..1)')
    ap.add_argument('--edge_pts', type=float, default=2.0, help='Min edge in points to bet')
    ap.add_argument('--home_adv', type=float, default=2.0, help='Home-court/field advantage in points')
    ap.add_argument('--elo_K', type=float, default=25.0, help='Elo update strength')
    args = ap.parse_args()

    df = pd.read_csv(args.data)
    # Ensure correct dtypes
    df['date'] = pd.to_datetime(df['date'])
    for c in ['home_score','away_score']:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors='coerce')
    df['book_spread'] = pd.to_numeric(df['book_spread'], errors='coerce')
    df['book_odds'] = pd.to_numeric(df['book_odds'], errors='coerce')

    # Split historical vs upcoming
    hist = df.dropna(subset=['home_score','away_score']).copy()
    upcoming = df[df['home_score'].isna() | df['away_score'].isna()].copy()

    # Fit Elo
    elo = fit_elo(hist, K=args.elo_K, home_adv_pts=args.home_adv)

    # Build residuals for sigma on history
    residuals = []
    for _, row in hist.iterrows():
        h, a = row['home_team'], row['away_team']
        h_elo = elo.get(h, 1500.0)
        a_elo = elo.get(a, 1500.0)
        model_spread = project_spread(h_elo, a_elo, args.home_adv)
        # Residual is (actual margin - model spread)
        margin = row['home_score'] - row['away_score']
        residuals.append(margin - model_spread)
    sigma = estimate_sigma(residuals)

    recs = []
    for _, row in upcoming.iterrows():
        h, a = row['home_team'], row['away_team']
        h_elo = elo.get(h, 1500.0)
        a_elo = elo.get(a, 1500.0)
        model_spread = project_spread(h_elo, a_elo, args.home_adv)

        # Book spread is negative if HOME is favored by that many points
        book_spread = row['book_spread']
        dec = american_to_decimal(row['book_odds'])

        # Convert to cover probability for the side implied by model edge:
        # We define edge_pts = model_spread - (-book_spread) when betting HOME
        # The probability that HOME margin > -book_spread is:
        # P(margin > -book_spread) where margin ~ N(model_spread, sigma)
        # Similarly for AWAY: P(margin < -book_spread) = CDF(-book_spread; mu=model_spread, sigma)
        home_cover_prob = 1 - normal_cdf(-book_spread, mu=model_spread, sigma=sigma)
        away_cover_prob = normal_cdf(-book_spread, mu=model_spread, sigma=sigma)

        # Choose the better side by EV
        home_ev = home_cover_prob * (dec - 1) - (1 - home_cover_prob)
        away_ev = away_cover_prob * (dec - 1) - (1 - away_cover_prob)

        if home_ev >= away_ev:
            side = 'HOME'
            win_p = home_cover_prob
            edge_pts = model_spread - (-book_spread)  # model - book implied margin
        else:
            side = 'AWAY'
            win_p = away_cover_prob
            # For away side, effective model edge is (-model_spread) - (book_spread)
            edge_pts = (-model_spread) - (book_spread)

        kelly = kelly_fraction(win_p, dec, frac=args.kelly)
        stake = kelly * args.bankroll

        # Only include if model edge in points exceeds threshold
        if abs(edge_pts) >= args.edge_pts and stake > 0:
            recs.append({
                'date': row['date'].strftime('%Y-%m-%d'),
                'league': row['league'],
                'home_team': h,
                'away_team': a,
                'book_spread': book_spread,
                'book_odds': row['book_odds'],
                'model_spread(home - away)': round(model_spread, 2),
                'sigma_pts': round(sigma, 2),
                'side': side,
                'win_prob': round(win_p, 3),
                'kelly_frac': round(kelly, 3),
                'stake_$': round(stake, 2),
                'edge_pts': round(edge_pts, 2),
                'expected_value_per_$': round(max(home_ev, away_ev), 4)
            })

    out_dir = "output"
    os.makedirs(out_dir, exist_ok=True)
    out_path = f"{out_dir}/recommendations.csv"
    pd.DataFrame(recs).sort_values(['date','expected_value_per_$'], ascending=[True, False]).to_csv(out_path, index=False)
    print(f"Saved recommendations to {out_path}")
    if len(recs) == 0:
        print("No bets met the edge threshold. Try lowering --edge_pts.")

if __name__ == '__main__':
    main()
'''
script_path = os.path.join(base_dir, "betting_algo.py")
with open(script_path, "w") as f:
    f.write(script)

# ---------------------------

# Run the script once on synthetic data
# ---------------------------
!python /mnt/data/betting_package/betting_algo.py --data /mnt/data/betting_package/data/games.csv --bankroll 20000 --kelly 0.5 --edge_pts 2.0 --home_adv 2.0 --elo_K 20.0

# Display recommendations to the user as a spreadsheet
recs_path = os.path.join(base_dir, "output", "recommendations.csv")
recs = pd.read_csv(recs_path) if os.path.exists(recs_path) else pd.DataFrame()
if not recs.empty:
    print("Betting Recommendations (Demo):")
    print(recs.head())


print("Paths created:")
print(f"- Script: {script_path}")
print(f"- Data CSV: {csv_path}")
print(f"- README: {os.path.join(base_dir, 'README.txt')}")
print(f"- Output: {recs_path}")

Saved recommendations to output/recommendations.csv
Paths created:
- Script: /mnt/data/betting_package/betting_algo.py
- Data CSV: /mnt/data/betting_package/data/games.csv
- README: /mnt/data/betting_package/README.txt
- Output: /mnt/data/betting_package/output/recommendations.csv
