In [1]:
import pandas as pd

pd.set_option('display.max_columns', None)  # Show all columns in the DataFrame
df = pd.read_csv('C:/Users/admin/OneDrive/Desktop/Web Design/Portfolio/Player Analysis/wnba_stats_23June25.csv')

df.head(1)

Unnamed: 0,TEAM,GP,W,L,WIN%,MIN,PTS,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,OREB,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,+/-,MIN.1,OffRtg,DefRtg,NetRtg,AST%,AST/TO,ASTRATIO,OREB%,DREB%,REB%,TOV%,eFG%,TS%,PACE,PACE/40,PIE,Opp_FGM,Opp_FGA,Opp_FG%,Opp_3PM,Opp_3PA,Opp_3P%,Opp_FTM,Opp_FTA,Opp_FT%,Opp_OREB,Opp_DREB,Opp_REB,Opp_AST,Opp_TOV,Opp_STL,Opp_BLK,Opp_BLKA,Opp_PF,Opp_PFD,Opp_PTS,Opp_+/-
0,Washington Mystics,14.0,6.0,8.0,0.429,40.4,79.5,27.6,63.0,43.8,5.6,16.6,33.6,18.8,25.6,73.5,8.8,27.1,35.9,18.4,16.1,6.6,3.1,4.1,21.4,22.9,-1.6,565.0,98.5,101.0,-2.5,66.8,1.14,16.5,31.9,71.6,52.4,20.0,48.2,53.5,95.75,79.79,50.0,27.3,66.2,41.2,8.6,28.0,30.9,17.9,22.4,79.6,8.1,24.4,32.5,18.6,14.4,8.3,4.1,3.1,22.9,21.4,81.1,1.6


In [2]:
#Drop missing values
df.dropna(inplace=True)

df["Win"] = df['PTS'] > df['Opp_PTS']
df['Win'] = df['Win'].astype(int)

determinants = ['TOV', 'FG%', '3P%', 'FT%', 'REB', 'NetRtg', 'Opp_FG%', 'Opp_3P%', 'Opp_FTM', 'Opp_REB', 'Opp_TOV']
x = df[determinants]
y = df['Win']

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

Model Accuracy: 0.75


In [4]:
def predict_matchup(team_a, team_b):
    # Get both team stats
    team_a_stats = df[df['TEAM'] == team_a]
    team_b_stats = df[df['TEAM'] == team_b]
    
    if team_a_stats.empty or team_b_stats.empty:
        return "One or both team names not found."

    # Create a synthetic matchup: team A vs. team B
    # We treat team A as the team being predicted, and team B as the opponent
    # Prepare input data for prediction using the determinants/features
    input_data = pd.DataFrame([{
        'TOV': team_a_stats['TOV'].values[0],
        'FG%': team_a_stats['FG%'].values[0],
        '3P%': team_a_stats['3P%'].values[0],
        'FT%': team_a_stats['FT%'].values[0],
        'REB': team_a_stats['REB'].values[0],
        'NetRtg': team_a_stats['NetRtg'].values[0],
        'Opp_FG%': team_b_stats['FG%'].values[0],
        'Opp_3P%': team_b_stats['3P%'].values[0],
        'Opp_FTM': team_b_stats['FTM'].values[0],
        'Opp_REB': team_b_stats['REB'].values[0],
        'Opp_TOV': team_b_stats['TOV'].values[0],
    }])
    
    prediction = model.predict(input_data)[0]
    winner = team_a if prediction == 1 else team_b
    loser = team_b if prediction == 1 else team_a
    return f"{winner} is more likely to beat {loser}."

In [5]:
predict_matchup("Golden State Valkyries", "New York Liberty")



'New York Liberty is more likely to beat Golden State Valkyries.'

In [None]:
predict_matchup("Washington Mystics", "Minnesota Lynx")

In [None]:
predict_matchup("Indiana Fever", "Seattle Storm")

In [7]:

bucket_determinants = ['TOV', 'FG%', 'FGM', '3P%', 'FT%', 'REB', 'AST', 'Opp_REB', 'Opp_TOV', 'Opp_BLK', 'Opp_STL']
xx = df[bucket_determinants]
yy = df['PTS'].astype(int)

In [21]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score

x_train, x_test, y_train, y_test = train_test_split(xx, yy, test_size=0.3, random_state=41)
pointsmodel = RandomForestRegressor()
pointsmodel.fit(x_train, y_train)

accuracy = pointsmodel.score(x_test, y_test)
print(f"Points Prediction Model Accuracy: {accuracy:.2f}")

Points Prediction Model Accuracy: 0.12


In [13]:
def point_difference(team_a, team_b):
    def predict_points(team_a, team_b):
    # Get both team stats
        team_a_stats = df[df['TEAM'] == team_a]
        team_b_stats = df[df['TEAM'] == team_b]
    
        if team_a_stats.empty or team_b_stats.empty:
            return "One or both team names not found."

    # Create a synthetic matchup: team A vs. team B
    # We treat team A as the team being predicted, and team B as the opponent
    # Prepare input data for prediction using the determinants/features
        input_data = pd.DataFrame([{
            'TOV': team_a_stats['TOV'].values[0],
            'FG%': team_a_stats['FG%'].values[0],
            'FGM': team_a_stats['FGM'].values[0],
            '3P%': team_a_stats['3P%'].values[0],
            'FT%': team_a_stats['FT%'].values[0],
            'REB': team_a_stats['REB'].values[0],
            'AST': team_a_stats['AST'].values[0],
            'Opp_REB': team_b_stats['REB'].values[0],
            'Opp_TOV': team_b_stats['TOV'].values[0],
            'Opp_BLK': team_b_stats['BLK'].values[0],
            'Opp_STL': team_b_stats['STL'].values[0],
        }])
    
        prediction = pointsmodel.predict(input_data)[0]
        return prediction
        #return f"{team_a} is predicted to score {prediction} points against {team_b}."
    team_a_points = predict_points(team_a, team_b)
    team_b_points = predict_points(team_b, team_a)
    return f"{team_a} will score {team_a_points} and {team_b} will score {team_b_points} \n The point difference is {team_a_points - team_b_points} points."

In [22]:
point_difference("Connecticut Sun", "Las Vegas Aces")

'Connecticut Sun will score 75.89 and Las Vegas Aces will score 79.91 \n The point difference is -4.019999999999996 points.'