In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import itertools
from collections import deque
import glob
import os

# ------------------------
# Data Handling and Preparation with Days Since Last Game
# Combining NBA datasets (NBA2004-05 to NBA2024-25) with New CSV Column Names
# ------------------------

# Define the folder path and file pattern.
folder_path = "/Users/tristanpoul/Desktop/Personal code/AllNBAdata"
# Files should be named like NBA2004-05.csv, NBA2005-06.csv, ..., NBA2024-25.csv
file_pattern = os.path.join(folder_path, "NBA*-*.csv")
file_list = glob.glob(file_pattern)

# Function to extract starting year from file names (used for sorting).
def extract_year(file_name):
    base = os.path.basename(file_name)
    # Remove the "NBA" prefix and the ".csv" extension,
    # then take the first 4 characters (starting year).
    year_str = base.replace("NBA", "").replace(".csv", "")[:4]
    return int(year_str)

file_list.sort(key=extract_year)

# Read each CSV file, rename the columns, and keep only the required columns.
df_list = []
for file in file_list:
    df = pd.read_csv(file)
    # Rename columns from the new CSV schema to your analysis schema.
    df = df.rename(columns={
        "Visitor/Neutral": "Visitor",  # Visitor team name
        "Home/Neutral": "Home",          # Home team name
        "PTS": "VPTS",                   # Visitor points
        "PTS.1": "HPTS"                  # Home points
    })
    # Keep only the columns needed for analysis.
    df = df[["Date", "Visitor", "Home", "VPTS", "HPTS"]]
    df_list.append(df)

# Combine all DataFrames together.
full25season = pd.concat(df_list, ignore_index=True)

# Convert the "Date" column to datetime format.
full25season['Date_clean'] = pd.to_datetime(full25season['Date'], format='%a %b %d %Y', errors='coerce')

# NOTE: We no longer filter out games before 2021 so that all seasons are included.
# full25season = full25season[full25season['Date_clean'].dt.year >= 2021]

# Sort the data by date and remove rows with missing home points.
full25season = full25season.sort_values('Date_clean')
full25season = full25season[full25season['HPTS'].notna()]

# Create Season column:
# If month is October or later, season = current year; otherwise, season = previous year.
full25season['Season'] = full25season['Date_clean'].apply(lambda d: d.year if d.month >= 10 else d.year - 1)

# ------------------------
# Remove the 2019-2020 season.
# The 2019-20 season is represented by Season == 2019.
# ------------------------
full25season = full25season[full25season['Season'] != 2019]

# Initialize starting Elo ratings for each team (all start at 1000).
teams = pd.unique(pd.concat([full25season['Home'], full25season['Visitor']]))
elo_vec = {team: 1000 for team in teams}

# Prepare Elo columns in the DataFrame.
for col in ['visitor_elo_pre', 'home_elo_pre', 'visitor_elo_post', 'home_elo_post']:
    full25season[col] = np.nan

# Prepare additional columns for season metrics.
full25season['home_game_num'] = np.nan
full25season['visitor_game_num'] = np.nan
full25season['home_win_pct'] = np.nan
full25season['visitor_win_pct'] = np.nan

# NEW: Add columns for days since last game.
full25season['home_days_since_last'] = np.nan
full25season['visitor_days_since_last'] = np.nan

# Structures for dynamic K ratings and season-specific tracking.
games_played_vec = {team: 0 for team in teams}
season_game_count = {}  # key = (season, team)
season_wins = {}        # key = (season, team)
last_game_date = {team: None for team in teams}  # Store last game date for each team.

# Set the current season based on the first game.
current_season = full25season.iloc[0]['Season']

def get_dynamic_K(gp):
    if gp < 5:
        return 20
    elif gp < 10:
        return 15
    else:
        return 12

# Process each game to update Elo ratings, season stats, and days between games.
for i, game in full25season.iterrows():
    game_season = game['Season']
    current_date = game['Date_clean']
    
    # Reset Elo ratings and season stats at a season change.
    if game_season != current_season:
        sorted_teams = sorted(elo_vec.keys(), key=lambda team: elo_vec[team], reverse=True)
        total_teams = len(sorted_teams)
        for rank, team in enumerate(sorted_teams, start=1):
            elo_vec[team] = 1000 + ((total_teams / 2) - rank) * 3
        season_game_count = {}
        season_wins = {}
        current_season = game_season

    visitor_team = game['Visitor']
    home_team = game['Home']
    visitor_key = (game_season, visitor_team)
    home_key = (game_season, home_team)
    
    # Compute pre-game win percentage for each team.
    pre_visitor_games = season_game_count.get(visitor_key, 0)
    pre_home_games = season_game_count.get(home_key, 0)
    visitor_win_pct = season_wins.get(visitor_key, 0) / pre_visitor_games if pre_visitor_games > 0 else 0.5
    home_win_pct = season_wins.get(home_key, 0) / pre_home_games if pre_home_games > 0 else 0.5
    full25season.at[i, 'visitor_win_pct'] = visitor_win_pct
    full25season.at[i, 'home_win_pct'] = home_win_pct
    
    visitor_game_num = pre_visitor_games + 1
    home_game_num = pre_home_games + 1
    season_game_count[visitor_key] = visitor_game_num
    season_game_count[home_key] = home_game_num
    full25season.at[i, 'visitor_game_num'] = visitor_game_num
    full25season.at[i, 'home_game_num'] = home_game_num
    
    # Compute days since last game.
    if last_game_date[home_team] is None:
        home_days_since = 7
    else:
        home_days_since = (current_date - last_game_date[home_team]).days
        
    if last_game_date[visitor_team] is None:
        visitor_days_since = 7
    else:
        visitor_days_since = (current_date - last_game_date[visitor_team]).days
    
    full25season.at[i, 'home_days_since_last'] = home_days_since
    full25season.at[i, 'visitor_days_since_last'] = visitor_days_since
    
    last_game_date[home_team] = current_date
    last_game_date[visitor_team] = current_date
    
    # Record pre-game Elo ratings.
    visitor_elo_pre = elo_vec[visitor_team]
    home_elo_pre = elo_vec[home_team]
    full25season.at[i, 'visitor_elo_pre'] = visitor_elo_pre
    full25season.at[i, 'home_elo_pre'] = home_elo_pre
    
    # Determine dynamic K values.
    K_visitor = get_dynamic_K(games_played_vec[visitor_team])
    K_home = get_dynamic_K(games_played_vec[home_team])
    
    # Calculate expected win probabilities.
    exp_visitor = 1 / (1 + 10 ** ((home_elo_pre - visitor_elo_pre) / 400))
    exp_home = 1 - exp_visitor
    
    # Set the outcome for the game.
    if game['VPTS'] > game['HPTS']:
        actual_visitor, actual_home = 1, 0
    else:
        actual_visitor, actual_home = 0, 1
    
    # Update Elo ratings.
    visitor_elo_post = visitor_elo_pre + K_visitor * (actual_visitor - exp_visitor)
    home_elo_post = home_elo_pre + K_home * (actual_home - exp_home)
    elo_vec[visitor_team] = visitor_elo_post
    elo_vec[home_team] = home_elo_post
    full25season.at[i, 'visitor_elo_post'] = visitor_elo_post
    full25season.at[i, 'home_elo_post'] = home_elo_post
    
    games_played_vec[visitor_team] += 1
    games_played_vec[home_team] += 1
    
    # Update season wins.
    season_wins[visitor_key] = season_wins.get(visitor_key, 0) + actual_visitor
    season_wins[home_key] = season_wins.get(home_key, 0) + actual_home

# Compute recent win margins (rolling window over the last 5 games).
recent_margins = {team: deque(maxlen=5) for team in teams}
full25season['home_recent_margin'] = 0.0
full25season['visitor_recent_margin'] = 0.0

for i, game in full25season.iterrows():
    home_team = game['Home']
    visitor_team = game['Visitor']
    home_recent = np.mean(recent_margins[home_team]) if len(recent_margins[home_team]) > 0 else 0.0
    visitor_recent = np.mean(recent_margins[visitor_team]) if len(recent_margins[visitor_team]) > 0 else 0.0
    full25season.at[i, 'home_recent_margin'] = home_recent
    full25season.at[i, 'visitor_recent_margin'] = visitor_recent
    
    home_margin = game['HPTS'] - game['VPTS']
    visitor_margin = game['VPTS'] - game['HPTS']
    
    recent_margins[home_team].append(home_margin)
    recent_margins[visitor_team].append(visitor_margin)

# Compute rolling wins (number of wins in the last 5 games) for each team.
recent_wins = {team: deque(maxlen=5) for team in teams}
full25season['home_recent_wins'] = 0
full25season['visitor_recent_wins'] = 0

for i, game in full25season.iterrows():
    home_team = game['Home']
    visitor_team = game['Visitor']
    home_recent_wins = sum(recent_wins[home_team]) if recent_wins[home_team] else 0
    visitor_recent_wins = sum(recent_wins[visitor_team]) if recent_wins[visitor_team] else 0
    full25season.at[i, 'home_recent_wins'] = home_recent_wins
    full25season.at[i, 'visitor_recent_wins'] = visitor_recent_wins

    # Determine the outcome of the game
    if game['HPTS'] > game['VPTS']:
        recent_wins[home_team].append(1)
        recent_wins[visitor_team].append(0)
    else:
        recent_wins[home_team].append(0)
        recent_wins[visitor_team].append(1)


# Create target variable for classification (for comparison purposes).
full25season['home_win'] = (full25season['HPTS'] > full25season['VPTS']).astype(int)

# Clip extreme values in "days since last game" to a maximum of 30 days.
full25season['home_days_since_last'] = full25season['home_days_since_last'].clip(upper=30)
full25season['visitor_days_since_last'] = full25season['visitor_days_since_last'].clip(upper=30)

# ------------------------
# Extract Elo time series for each team.
# ------------------------
elo_records = []
for _, row in full25season.iterrows():
    date = row['Date_clean']
    home_team = row['Home']
    visitor_team = row['Visitor']
    home_elo = row['home_elo_post']
    visitor_elo = row['visitor_elo_post']
    
    elo_records.append({'Team': home_team, 'Date': date, 'Elo': home_elo})
    elo_records.append({'Team': visitor_team, 'Date': date, 'Elo': visitor_elo})

elo_df = pd.DataFrame(elo_records)


In [2]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# ------------------------
# Single Non-Linear Model for Winner Prediction
# ------------------------

# Define the base features. These features should have been computed during your preprocessing.
# They include:
#   - diff_elo: Difference in pre-game Elo (home_elo_pre - visitor_elo_pre)
#   - home_recent_margin, visitor_recent_margin: Recent average margins from the last few games.
#   - win_pct_diff: Difference between home and visitor win percentages.
#   - home_days_since_last, visitor_days_since_last: Number of days since each team's last game.
#   - home_recent_wins, visitor_recent_wins: Number of wins over the last 5 games.
base_features = [
    'diff_elo', 
    'home_recent_margin', 
    'visitor_recent_margin', 
    'win_pct_diff', 
    'home_days_since_last', 
    'visitor_days_since_last',
    'home_recent_wins', 
    'visitor_recent_wins'
]

# Filter ml_data from full25season to exclude the first 5 games for both home and visitor teams.
ml_data = full25season[
    (full25season['home_game_num'] > 5) & 
    (full25season['visitor_game_num'] > 5)
].copy()

# Compute additional features (if not already computed).
# 'diff_elo' and 'win_pct_diff' will be recalculated here.
ml_data['diff_elo'] = ml_data['home_elo_pre'] - ml_data['visitor_elo_pre']
ml_data['win_pct_diff'] = ml_data['home_win_pct'] - ml_data['visitor_win_pct']

# Set up the features and target.
X = ml_data[base_features]
y = ml_data['home_win']  # Target: 1 if home wins, 0 if not.

# Split data into training and test sets (e.g., 80/20 split).
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.05, random_state=42
)

# Standardize the features.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create a RandomForestClassifier model.
model = RandomForestClassifier(n_estimators=300, random_state=42)

# Use 5-fold cross-validation on the training set.
cv = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=cv, scoring='accuracy')
print("Cross-validated Accuracy on Training Set: {:.2f}".format(cv_scores.mean()))

# Train the model on the full training set.
model.fit(X_train_scaled, y_train)

# Make predictions on both training and test sets.
y_train_pred = model.predict(X_train_scaled)
y_test_pred = model.predict(X_test_scaled)

# Evaluate the model.
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print("\nFinal Model Evaluation:")
print("  Train Accuracy: {:.2f}".format(train_accuracy))
print("  Test Accuracy: {:.2f}".format(test_accuracy))
print("\nClassification Report on Test Set:")
print(classification_report(y_test, y_test_pred))


Cross-validated Accuracy on Training Set: 0.65

Final Model Evaluation:
  Train Accuracy: 1.00
  Test Accuracy: 0.64

Classification Report on Test Set:
              precision    recall  f1-score   support

           0       0.58      0.47      0.52       499
           1       0.67      0.76      0.72       710

    accuracy                           0.64      1209
   macro avg       0.63      0.62      0.62      1209
weighted avg       0.64      0.64      0.64      1209



In [6]:
import pandas as pd
import numpy as np

# ---------------------------------------------
# Updated Matchups DataFrame Generation Using the Win Probabilities
# from Your Final ML Model
# ---------------------------------------------

# Define the updated base features (we've removed 'home_game_num' and 'visitor_game_num')
base_features_updated = [
    'diff_elo', 
    'home_recent_margin', 
    'visitor_recent_margin', 
    'win_pct_diff', 
    'home_days_since_last', 
    'visitor_days_since_last',
    'home_recent_wins', 
    'visitor_recent_wins'
]

# Extract the most recent metrics for each team from full25season.
# (Ensure full25season contains the computed rolling win features.)
latest_home = full25season.sort_values('Date_clean').groupby('Home').last().reset_index()
latest_away = full25season.sort_values('Date_clean').groupby('Visitor').last().reset_index()

# Create dictionaries mapping team names to their latest metrics.
home_metrics = {row['Home']: row for _, row in latest_home.iterrows()}
away_metrics = {row['Visitor']: row for _, row in latest_away.iterrows()}

matchup_list = []
teams_list = list(elo_vec.keys())

for home_team in teams_list:
    for away_team in teams_list:
        if home_team != away_team:
            # Retrieve current Elo ratings (default to 1000 if missing)
            home_elo = elo_vec.get(home_team, 1000)
            away_elo = elo_vec.get(away_team, 1000)
            diff_elo = home_elo - away_elo
            
            # Retrieve home team metrics.
            if home_team in home_metrics:
                home_recent_margin = home_metrics[home_team].get('home_recent_margin', 0.0)
                home_days_since_last = home_metrics[home_team].get('home_days_since_last', 7)
                home_win_pct = home_metrics[home_team].get('home_win_pct', 0.5)
                home_recent_wins = home_metrics[home_team].get('home_recent_wins', 0)
            else:
                home_recent_margin = 0.0
                home_days_since_last = 7
                home_win_pct = 0.5
                home_recent_wins = 0
                
            # Retrieve visitor team metrics.
            if away_team in away_metrics:
                visitor_recent_margin = away_metrics[away_team].get('visitor_recent_margin', 0.0)
                visitor_days_since_last = away_metrics[away_team].get('visitor_days_since_last', 7)
                visitor_win_pct = away_metrics[away_team].get('visitor_win_pct', 0.5)
                visitor_recent_wins = away_metrics[away_team].get('visitor_recent_wins', 0)
            else:
                visitor_recent_margin = 0.0
                visitor_days_since_last = 7
                visitor_win_pct = 0.5
                visitor_recent_wins = 0
            
            # Compute win percentage difference.
            win_pct_diff = home_win_pct - visitor_win_pct
            
            # Form the feature vector (order must match the order used during training)
            feature_vector = [
                diff_elo,
                home_recent_margin,
                visitor_recent_margin,
                win_pct_diff,
                home_days_since_last,
                visitor_days_since_last,
                home_recent_wins,
                visitor_recent_wins
            ]
            
            # Create a DataFrame for the feature vector and ensure proper column names.
            feature_df = pd.DataFrame([feature_vector], columns=base_features_updated)
            # Standardize the feature vector using the scaler fitted during training.
            feature_vector_scaled = scaler.transform(feature_df)
            
            # Predict win probability using the trained ML model.
            # (Here, 'model' is your final trained winner prediction model.)
            pred_win_prob = model.predict_proba(feature_vector_scaled)[0, 1]
            final_pred = 1 if pred_win_prob >= 0.5 else 0
            pred_winner = 'Home' if final_pred == 1 else 'Away'
            
            matchup_list.append({
                "Home": home_team,
                "Away": away_team,
                "PredWinner": pred_winner,
                "PredWinProb": pred_win_prob
            })

# Create the matchups DataFrame.
matchups_df = pd.DataFrame(matchup_list)

# Print the matchup for Minnesota Timberwolves vs. Memphis Grizzlies.
tm_vs_gr = matchups_df[(matchups_df['Home'] == 'Los Angeles Lakers') & 
                       (matchups_df['Away'] == 'Minnesota Timberwolves')]
print(tm_vs_gr)
print(1/1.42)

tm_vs_xy = matchups_df[(matchups_df['Home'] == 'Indiana Pacers') & 
                       (matchups_df['Away'] == 'Milwaukee Bucks')]
print(tm_vs_xy)
print(1/1.56)



                  Home                    Away PredWinner  PredWinProb
82  Los Angeles Lakers  Minnesota Timberwolves       Home     0.523333
0.7042253521126761
               Home             Away PredWinner  PredWinProb
873  Indiana Pacers  Milwaukee Bucks       Home     0.683333
0.641025641025641


In [4]:
import numpy as np
import pandas as pd
from IPython.display import display
from itertools import combinations
import os

# --------------------------------------------------
# 1. Define the team seeds (from your screenshot).
#    (Lower seed number = higher seed)
# --------------------------------------------------
team_seeds = {
    # East
    "Cleveland Cavaliers": 1,
    "Boston Celtics": 2,
    "New York Knicks": 3,
    "Indiana Pacers": 4,
    "Milwaukee Bucks": 5,
    "Detroit Pistons": 6,
    "Orlando Magic": 7,
    "Atlanta Hawks": 8,
    "Chicago Bulls": 9,
    "Miami Heat": 10,
    # West
    "Oklahoma City Thunder": 1,
    "Houston Rockets": 2,
    "Los Angeles Lakers": 3,
    "Denver Nuggets": 4,
    "LA Clippers": 5,
    "Minnesota Timberwolves": 6,
    "Golden State Warriors": 7,
    "Memphis Grizzlies": 8,
    "Sacramento Kings": 9,
    "Dallas Mavericks": 10
}

# --------------------------------------------------
# 2. Helper function to determine higher and lower seed.
#    If seeds are the same, randomly pick.
# --------------------------------------------------
def get_higher_and_lower_seed(team1, team2):
    seed1 = team_seeds[team1]
    seed2 = team_seeds[team2]
    if seed1 < seed2:
        return team1, team2  # (higher_seed_team, lower_seed_team)
    elif seed1 > seed2:
        return team2, team1
    else:
        # If seeds are equal, choose randomly.
        return (team1, team2) if np.random.rand() < 0.5 else (team2, team1)

# --------------------------------------------------
# 3. Best-of-7 function for a series where each game 
#    has its own win probability.
#    p is a length-7 list where p[i] is the probability 
#    the team wins Game i+1.
# --------------------------------------------------
def best_of_7_probability(p):
    """
    Given a 7-game win probability list p,
    returns a tuple:
      (total_win_prob, win_in_7, win_in_6, win_in_5, win_in_4)
    corresponding to winning the series.
    """
    # Win in exactly 4 games: wins in Games 1-4.
    prob_4 = p[0] * p[1] * p[2] * p[3]
    
    # Win in exactly 5 games: exactly 3 wins in the first 4, then win Game 5.
    prob_5 = 0
    for subset in combinations(range(4), 3):
        sub_prob = 1
        for i in range(4):
            sub_prob *= p[i] if i in subset else (1 - p[i])
        prob_5 += sub_prob
    prob_5 *= p[4]
    
    # Win in exactly 6 games: exactly 3 wins in the first 5, then win Game 6.
    prob_6 = 0
    for subset in combinations(range(5), 3):
        sub_prob = 1
        for i in range(5):
            sub_prob *= p[i] if i in subset else (1 - p[i])
        prob_6 += sub_prob
    prob_6 *= p[5]
    
    # Win in exactly 7 games: exactly 3 wins in the first 6, then win Game 7.
    prob_7 = 0
    for subset in combinations(range(6), 3):
        sub_prob = 1
        for i in range(6):
            sub_prob *= p[i] if i in subset else (1 - p[i])
        prob_7 += sub_prob
    prob_7 *= p[6]
    
    total_win_prob = prob_4 + prob_5 + prob_6 + prob_7
    return total_win_prob, prob_7, prob_6, prob_5, prob_4

# --------------------------------------------------
# 4. Define the teams of interest.
# --------------------------------------------------
teams_east = [
    "Cleveland Cavaliers",
    "Boston Celtics",
    "New York Knicks",
    "Indiana Pacers",
    "Milwaukee Bucks",
    "Detroit Pistons",
    "Orlando Magic",
    "Atlanta Hawks",
    "Chicago Bulls",
    "Miami Heat"
]
teams_west = [
    "Oklahoma City Thunder",
    "Houston Rockets",
    "Los Angeles Lakers",
    "Denver Nuggets",
    "LA Clippers",
    "Minnesota Timberwolves",
    "Golden State Warriors",
    "Memphis Grizzlies",
    "Sacramento Kings",
    "Dallas Mavericks"
]
teams_of_interest = set(teams_east + teams_west)

# --------------------------------------------------
# 5. Filter the master matchups DataFrame to only include
#    teams of interest.
#
#    (Assuming matchups_df contains columns:
#     "Home", "Away", "PredWinProb")
# --------------------------------------------------
df_playoffs = matchups_df[
    (matchups_df['Home'].isin(teams_of_interest)) &
    (matchups_df['Away'].isin(teams_of_interest))
].copy()

# --------------------------------------------------
# 6. Loop over every possible unique matchup among the teams.
# --------------------------------------------------
results = []
teams_list = sorted(list(teams_of_interest))  # sorted for reproducibility

# Use combinations to generate unique pairs.
for team1, team2 in combinations(teams_list, 2):
    # Determine which team is higher seeded.
    higher_seed_team, lower_seed_team = get_higher_and_lower_seed(team1, team2)
    
    # a) Probability for higher_seed_team at home vs. lower_seed_team.
    row_higher_home = df_playoffs[
        (df_playoffs['Home'] == higher_seed_team) &
        (df_playoffs['Away'] == lower_seed_team)
    ]
    if row_higher_home.empty:
        continue
    p_higher_home = row_higher_home.iloc[0]['PredWinProb']
    
    # b) Probability for lower_seed_team at home vs. higher_seed_team.
    row_lower_home = df_playoffs[
        (df_playoffs['Home'] == lower_seed_team) &
        (df_playoffs['Away'] == higher_seed_team)
    ]
    if row_lower_home.empty:
        continue
    p_lower_home_for_lower = row_lower_home.iloc[0]['PredWinProb']
    
    # c) The higher seed's road win probability is the complement.
    p_higher_road = 1 - p_lower_home_for_lower
    
    # d) Build the 7-game probability list (2–2–1–1–1 format):
    #    Games: Home, Home, Road, Road, Home, Road, Home.
    p_list = [
        p_higher_home,  # Game 1 (home)
        p_higher_home,  # Game 2 (home)
        p_higher_road,  # Game 3 (road)
        p_higher_road,  # Game 4 (road)
        p_higher_home,  # Game 5 (home)
        p_higher_road,  # Game 6 (road)
        p_higher_home   # Game 7 (home)
    ]
    
    # e) Compute the higher seed's series win probabilities.
    total_win_prob, win_in_7, win_in_6, win_in_5, win_in_4 = best_of_7_probability(p_list)
    
    # f) For the lower seed, construct the complementary win probabilities for each game.
    q_list = [1 - x for x in p_list]
    total_win_prob_lower, win_in7_lower, win_in6_lower, win_in5_lower, win_in4_lower = best_of_7_probability(q_list)
    
    # g) Save the results for this matchup.
    results.append({
        "Higher_Seed": higher_seed_team,
        "Higher_Seed_SeedNum": team_seeds[higher_seed_team],
        "Lower_Seed": lower_seed_team,
        "Lower_Seed_SeedNum": team_seeds[lower_seed_team],
        "Prob_HigherSeed_SeriesWin": total_win_prob,
        "Prob_HigherSeed_WinIn4": win_in_4,
        "Prob_HigherSeed_WinIn5": win_in_5,
        "Prob_HigherSeed_WinIn6": win_in_6,
        "Prob_HigherSeed_WinIn7": win_in_7,
        "Prob_LowerSeed_SeriesWin": total_win_prob_lower,
        "Prob_LowerSeed_WinIn4": win_in4_lower,
        "Prob_LowerSeed_WinIn5": win_in5_lower,
        "Prob_LowerSeed_WinIn6": win_in6_lower,
        "Prob_LowerSeed_WinIn7": win_in7_lower
    })

df_series_predictions = pd.DataFrame(results)

# --------------------------------------------------
# 7. Output the full series probabilities DataFrame.
# --------------------------------------------------
display(df_series_predictions)

# Optionally, save the full series predictions to an Excel file.
desktop_path_xlsx = os.path.join(os.path.expanduser("~"), "Desktop", "playoff_series_probs.xlsx")
df_series_predictions.to_excel(desktop_path_xlsx, index=False)
print("Saved series predictions to", desktop_path_xlsx)

# --------------------------------------------------
# 8. As a test, output the series between the Timberwolves and the Lakers.
# --------------------------------------------------
# Based on our seed data, the Lakers (seed 3) are higher seeded than the Timberwolves (seed 6).
tm_vs_lakers = df_series_predictions[
    (df_series_predictions['Higher_Seed'] == 'Los Angeles Lakers') &
    (df_series_predictions['Lower_Seed'] == 'Minnesota Timberwolves')
]
print("\nSeries between the Lakers and the Timberwolves:")
display(tm_vs_lakers)

cl_vs_nug = df_series_predictions[
    (df_series_predictions['Higher_Seed'] == 'New York Knicks') &
    (df_series_predictions['Lower_Seed'] == 'Detroit Pistons')
]
print("\nSeries between the Nuggets and the Clippers:")
display(cl_vs_nug)


Unnamed: 0,Higher_Seed,Higher_Seed_SeedNum,Lower_Seed,Lower_Seed_SeedNum,Prob_HigherSeed_SeriesWin,Prob_HigherSeed_WinIn4,Prob_HigherSeed_WinIn5,Prob_HigherSeed_WinIn6,Prob_HigherSeed_WinIn7,Prob_LowerSeed_SeriesWin,Prob_LowerSeed_WinIn4,Prob_LowerSeed_WinIn5,Prob_LowerSeed_WinIn6,Prob_LowerSeed_WinIn7
0,Boston Celtics,2,Atlanta Hawks,8,0.943741,0.320608,0.349175,0.171112,0.102847,0.056259,0.003161,0.007783,0.022226,0.023088
1,Atlanta Hawks,8,Chicago Bulls,9,0.503615,0.047335,0.141086,0.117394,0.197801,0.496385,0.062951,0.104022,0.202949,0.126463
2,Cleveland Cavaliers,1,Atlanta Hawks,8,0.926913,0.307803,0.320790,0.194124,0.104196,0.073087,0.004199,0.011832,0.024152,0.032904
3,Atlanta Hawks,8,Dallas Mavericks,10,0.264686,0.020794,0.057528,0.075641,0.110724,0.735314,0.145059,0.209828,0.227521,0.152905
4,Denver Nuggets,4,Atlanta Hawks,8,0.918194,0.270909,0.339207,0.181832,0.126246,0.081806,0.004969,0.011725,0.032890,0.032222
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,New York Knicks,3,Orlando Magic,7,0.624755,0.077705,0.190312,0.150073,0.206666,0.375245,0.039504,0.072001,0.154081,0.109659
167,New York Knicks,3,Sacramento Kings,9,0.817837,0.189709,0.260688,0.216767,0.150673,0.182163,0.013353,0.034581,0.058893,0.075336
168,Oklahoma City Thunder,1,Orlando Magic,7,0.792772,0.145415,0.274424,0.180925,0.192008,0.207228,0.016384,0.033496,0.087527,0.069821
169,Oklahoma City Thunder,1,Sacramento Kings,9,0.970240,0.411993,0.340533,0.151252,0.066461,0.029760,0.001485,0.004187,0.010476,0.013613


Saved series predictions to /Users/tristanpoul/Desktop/playoff_series_probs.xlsx

Series between the Lakers and the Timberwolves:


Unnamed: 0,Higher_Seed,Higher_Seed_SeedNum,Lower_Seed,Lower_Seed_SeedNum,Prob_HigherSeed_SeriesWin,Prob_HigherSeed_WinIn4,Prob_HigherSeed_WinIn5,Prob_HigherSeed_WinIn6,Prob_HigherSeed_WinIn7,Prob_LowerSeed_SeriesWin,Prob_LowerSeed_WinIn4,Prob_LowerSeed_WinIn5,Prob_LowerSeed_WinIn6,Prob_LowerSeed_WinIn7
138,Los Angeles Lakers,3,Minnesota Timberwolves,6,0.491656,0.057953,0.126454,0.143449,0.1638,0.508344,0.066255,0.123152,0.169744,0.149194



Series between the Nuggets and the Clippers:


Unnamed: 0,Higher_Seed,Higher_Seed_SeedNum,Lower_Seed,Lower_Seed_SeedNum,Prob_HigherSeed_SeriesWin,Prob_HigherSeed_WinIn4,Prob_HigherSeed_WinIn5,Prob_HigherSeed_WinIn6,Prob_HigherSeed_WinIn7,Prob_LowerSeed_SeriesWin,Prob_LowerSeed_WinIn4,Prob_LowerSeed_WinIn5,Prob_LowerSeed_WinIn6,Prob_LowerSeed_WinIn7
101,New York Knicks,3,Detroit Pistons,6,0.713494,0.07402,0.242675,0.138902,0.257897,0.286506,0.022122,0.039627,0.147723,0.077034
