In [1]:
import pandas as pd
import numpy as np



In [2]:
df_analytics =  pd.read_csv('past_rotowire_analytics.csv')

In [3]:
import pyodbc 
server = 'localhost\SQLEXPRESS'
database = 'nba_game_data'

cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER='+server+';DATABASE='+database+';')
cursor = cnxn.cursor()
sql = """
SELECT 
      [PLAYER_NAME]
      ,[GAME_DATE]
      ,[REB]
      ,[AST]
      ,[TOV]
      ,[STL]
      ,[BLK]
      ,[PTS]
      ,FG3M

  FROM [nba_game_data].[dbo].[PlayerGameLogs]
  where yearSeason = 2024
  """
df_actuals = pd.read_sql(sql,cnxn)

  df_actuals = pd.read_sql(sql,cnxn)


In [4]:
from slugify import slugify

In [5]:
df_analytics['Date'] = pd.to_datetime(df_analytics['Date'])
df_actuals['GAME_DATE'] = pd.to_datetime(df_actuals['GAME_DATE'])

In [6]:
# Slugify player names for both DataFrames
df_analytics['Player_Slug'] = df_analytics['Player'].apply(slugify)
df_actuals['Player_Slug'] = df_actuals['PLAYER_NAME'].apply(slugify)

In [7]:
# Join the dataframes on the slugified player names
df_combined = pd.merge(df_analytics, df_actuals, how='left', left_on = ['Player_Slug','Date'],right_on=['Player_Slug','GAME_DATE'])

# Function to calculate the score based on Market Name
def calculate_score(row):
    if row['Market Name'] == 'Points':
        return row['PTS']
    elif row['Market Name'] == 'PTS+REB+AST':
        return row['PTS'] + row['REB'] + row['AST']
    elif row['Market Name'] == 'Rebounds':
        return row['REB']
    elif row['Market Name'] == '3PT Made':
        return row['FG3M']
    elif row['Market Name'] == 'Steals':
        return row['STL']
    elif row['Market Name'] == 'Blocks':
        return row['BLK']
    elif row['Market Name'] == 'Assists':
        return row['AST']
    elif row['Market Name'] == 'Turnovers':
        return row['TOV']
    else:
        return None

# Apply the score calculation function
df_combined['Actual Score'] = df_combined.apply(calculate_score, axis=1)

# Select relevant columns to display the final result
# df_final = df_combined[['Player', 'Market Name', 'Line', 'Actual Score']]

# df_combined

In [8]:
df_combined = df_combined.dropna()
df_combined

Unnamed: 0,Player,Position,Team,Opponent,Date,Time,Market Name,Line,Site Less,Site More,...,PLAYER_NAME,GAME_DATE,REB,AST,TOV,STL,BLK,PTS,FG3M,Actual Score
0,Kyrie Irving,G,DAL,@CLE,2024-02-27,6:00 PM,Rebounds,4.5,-108,-157,...,Kyrie Irving,2024-02-27,6.0,3.0,3.0,1.0,1.0,30.0,5.0,6.0
1,Jaden Ivey,G,DET,@CHI,2024-02-27,7:00 PM,PTS+REB+AST,20.5,-122,-139,...,Jaden Ivey,2024-02-27,2.0,5.0,4.0,1.0,1.0,15.0,1.0,22.0
2,Jaden Ivey,G,DET,@CHI,2024-02-27,7:00 PM,Points,13.5,-120,-141,...,Jaden Ivey,2024-02-27,2.0,5.0,4.0,1.0,1.0,15.0,1.0,15.0
3,Anthony Edwards,G,MIN,SAS,2024-02-27,7:00 PM,Points,32.5,-132,-129,...,Anthony Edwards,2024-02-27,5.0,5.0,4.0,4.0,1.0,34.0,4.0,34.0
4,Dejounte Murray,G,ATL,UTA,2024-02-27,6:30 PM,PTS+REB+AST,40.5,-141,-120,...,Dejounte Murray,2024-02-27,4.0,10.0,5.0,1.0,1.0,17.0,1.0,31.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1139,Andrew Nembhard,G,IND,NOP,2024-02-28,6:30 PM,PTS+REB+AST,17.5,-130,-129,...,Andrew Nembhard,2024-02-28,4.0,1.0,1.0,1.0,0.0,8.0,2.0,13.0
1140,Andrew Nembhard,G,IND,NOP,2024-02-28,6:30 PM,Points,10.5,-112,-152,...,Andrew Nembhard,2024-02-28,4.0,1.0,1.0,1.0,0.0,8.0,2.0,8.0
1141,Immanuel Quickley,G,TOR,DAL,2024-02-28,6:30 PM,Turnovers,1.5,-127,-132,...,Immanuel Quickley,2024-02-28,5.0,9.0,1.0,2.0,0.0,28.0,5.0,1.0
1142,Jarrett Allen,C,CLE,@CHI,2024-02-28,7:00 PM,Rebounds,10.5,-117,-145,...,Jarrett Allen,2024-02-28,4.0,4.0,0.0,1.0,2.0,11.0,0.0,4.0


In [9]:
df_combined['Over_Under'] = (df_combined['Actual Score']>df_combined['Line']).astype(int)

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import numpy as np

# Create the target variable (1 if Actual Score > Line, 0 otherwise)
df_combined['Over_Under'] = np.where(df_combined['Actual Score'] > df_combined['Line'], 1, 0)
df_combined = df_combined.apply(pd.to_numeric, errors='coerce')
# df_combined = df_combined.dropna()

# Selecting the features and the target
features = ['Lean', 'DFS Pick\'em Sites Factor', 'Sportsbooks Factor', 'RotoWire Projection Factor', 'Hit Rate Factor', 'RotoWire Projection', 'Weighted Hit Rate']
X = df_combined[features].fillna(0)  # Fill missing values with 0 for simplicity
y = df_combined['Over_Under']
X['Lean'] = X['Lean'].apply(lambda x : 1 if x == 'More' else 0)

In [11]:
# X['Sportsbooks Factor'] = X['Sportsbooks Factor'].astype(float)
# X['RotoWire Projection Factor'] = X['RotoWire Projection Factor'].astype(float)
# X['Hit Rate Factor'] = X['Hit Rate Factor'].astype(float)
# X['Hit Rate Factor'] = X['Hit Rate Factor'].astype(float)
X.iloc[5:65]
# Convert the entire DataFrame to numeric, errors='coerce' will turn the failed conversions into NaN
X = X.apply(pd.to_numeric, errors='coerce')

In [12]:
from sklearn.linear_model import LogisticRegression

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X)

# Initialize the Logistic Regression model
logistic_model = LogisticRegression(random_state=42)

# Fit the model
logistic_model.fit(X_train_scaled, y)

# Getting the probability predictions for the test set
y_prob = logistic_model.predict_proba(X_test_scaled)[:, 1]  # Probability of the positive class ('Over')


# Coefficients
coefficients = logistic_model.coef_[0]

# Output the coefficients for interpretation
coefficients = dict(zip(features, coefficients))

In [13]:
coefficients

{'Lean': 0.0,
 "DFS Pick'em Sites Factor": 0.2182700560820932,
 'Sportsbooks Factor': -0.04611188181454682,
 'RotoWire Projection Factor': -0.1408374774387164,
 'Hit Rate Factor': 0.03682446264323377,
 'RotoWire Projection': -0.030719882800393738,
 'Weighted Hit Rate': 0.03669044317077059}

In [14]:
new_preds = pd.read_csv(r'C:\Users\bobby\Downloads\rw-sleeper-predictions-2024-02-29.csv')
new_preds['Lean'] = new_preds['Lean'].apply(lambda x : 1 if x == 'More' else 0)
new_preds.replace("-","")
new_preds = new_preds.fillna(0)
features_out = ['Player','Market Name','Line','Site Less','Site More','Lean', 'DFS Pick\'em Sites Factor', 'Sportsbooks Factor', 'RotoWire Projection Factor', 'Hit Rate Factor', 'RotoWire Projection', 'Weighted Hit Rate']


In [15]:

features = ['Lean', "DFS Pick'em Sites Factor", 'Sportsbooks Factor', 'RotoWire Projection Factor', 
            'Hit Rate Factor', 'RotoWire Projection', 'Weighted Hit Rate']

# Ensure you're selecting only the features required for prediction
X_new = new_preds[features]
X_new = X_new.apply(pd.to_numeric, errors='coerce')
X_new = X_new.fillna(0)

# Standardize the new data using the previously defined scaler
X_new_scaled = scaler.transform(X_new)

# Predict the probabilities
y_prob = logistic_model.predict_proba(X_new_scaled)[:, 1]  # Probability of the positive class ('Over')

# Add a prediction column
new_preds['Prediction'] = (y_prob > 0.5).astype(int)  # 1 for 'Over', 0 for 'Under'

# Correctly assign the probability column based on the prediction
new_preds['Probability'] = new_preds['Prediction'].apply(lambda x: y_prob[x] if x == 1 else 1 - y_prob[x])

# Since the above approach will not work as intended because it misuses the indexing, let's correct it further:
new_preds['Probability'] = [prob if pred == 1 else 1-prob for pred, prob in zip(new_preds['Prediction'], y_prob)]


# Create a DataFrame with the necessary columns
result_df = new_preds[['Player', 'Market Name', 'Line', 'Site Less', 'Site More', 'Prediction', 'Probability']]




In [16]:
# Add a new column for American Money Line Odds based on the Prediction
result_df['American Money Line Odds'] = result_df.apply(lambda x: x['Site Less'] if x['Prediction'] == 0 else x['Site More'], axis=1)
result_df = result_df[['Player', 'Market Name', 'Line','American Money Line Odds', 'Prediction', 'Probability']]
result_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_df['American Money Line Odds'] = result_df.apply(lambda x: x['Site Less'] if x['Prediction'] == 0 else x['Site More'], axis=1)


Unnamed: 0,Player,Market Name,Line,American Money Line Odds,Prediction,Probability
0,Devin Booker,PTS+REB+AST,38.5,-122,1,0.545327
1,Jusuf Nurkic,PTS+REB+AST,24.5,-121,0,0.580915
2,Franz Wagner,Points,21.5,-132,1,0.532039
3,Draymond Green,PTS+REB+AST,20.5,-125,0,0.534398
4,Damian Lillard,Assists,6.5,-145,1,0.723029
...,...,...,...,...,...,...
200,Jalen Green,PTS+REB+AST,23.5,-134,0,0.501446
201,Jimmy Butler,Points,19.5,-129,1,0.594630
202,Devin Vassell,PTS+REB+AST,30.5,-130,1,0.568544
203,Luguentz Dort,Points,9.5,-129,1,0.507426


In [17]:
def american_to_decimal(odds):
    """Convert American odds to decimal odds."""
    if odds > 0:
        return 1 + odds / 100
    else:
        return 1 - 100 / odds

def kelly_criterion(probability, odds, convervative_factor =.1):
    """Calculate the Kelly Criterion bet fraction."""
    decimal_odds = american_to_decimal(odds)
    b = decimal_odds - 1
    q = 1 - probability
    return (((b * probability) - q) / b)*convervative_factor

# Apply the Kelly Criterion to each row in your DataFrame
result_df['Bet Fraction'] = result_df.apply(lambda x: kelly_criterion(x['Probability'], x['American Money Line Odds'],.025), axis=1)

# To ensure no negative betting suggestions, we can clamp the values at 0
result_df['Bet Fraction'] = result_df['Bet Fraction'].clip(lower=0)

# If you want to calculate the actual amount to bet based on a bankroll, you can do:
bankroll = 1000  # Example bankroll amount
result_df['Bet Amount'] = round(result_df['Bet Fraction'] * bankroll)
result_df

Unnamed: 0,Player,Market Name,Line,American Money Line Odds,Prediction,Probability,Bet Fraction,Bet Amount
0,Devin Booker,PTS+REB+AST,38.5,-122,1,0.545327,0.000000,0.0
1,Jusuf Nurkic,PTS+REB+AST,24.5,-121,0,0.580915,0.001846,2.0
2,Franz Wagner,Points,21.5,-132,1,0.532039,0.000000,0.0
3,Draymond Green,PTS+REB+AST,20.5,-125,0,0.534398,0.000000,0.0
4,Damian Lillard,Assists,6.5,-145,1,0.723029,0.008036,8.0
...,...,...,...,...,...,...,...,...
200,Jalen Green,PTS+REB+AST,23.5,-134,0,0.501446,0.000000,0.0
201,Jimmy Butler,Points,19.5,-129,1,0.594630,0.001793,2.0
202,Devin Vassell,PTS+REB+AST,30.5,-130,1,0.568544,0.000191,0.0
203,Luguentz Dort,Points,9.5,-129,1,0.507426,0.000000,0.0


In [18]:
result_df.sort_values(by='Bet Amount',ascending=False).iloc[:60]

Unnamed: 0,Player,Market Name,Line,American Money Line Odds,Prediction,Probability,Bet Fraction,Bet Amount
50,Dejounte Murray,PTS+REB+AST,35.5,-139,1,0.853444,0.016243,16.0
7,Stephen Curry,Assists,4.5,-152,1,0.810445,0.013058,13.0
73,Shai Gilgeous-Alexander,Points,30.5,-139,1,0.761082,0.010725,11.0
60,Chet Holmgren,PTS+REB+AST,28.5,-143,0,0.735338,0.008922,9.0
4,Damian Lillard,Assists,6.5,-145,1,0.723029,0.008036,8.0
19,Dejounte Murray,Points,21.5,-134,1,0.697526,0.007305,7.0
28,Victor Wembanyama,Points,22.5,-137,0,0.691963,0.006749,7.0
8,Victor Wembanyama,Rebounds,10.5,-134,1,0.659679,0.005091,5.0
43,Jalen Brunson,Points,31.5,-122,1,0.613911,0.003572,4.0
80,Collin Sexton,3PT Made,1.5,-112,1,0.581828,0.002837,3.0
