# Predictions
This notebook will generate predictions for FanDuel or DraftKings and store them in our database.

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import os
import requests
import sqlite3
import re
import matplotlib.pyplot as plt
import pickle

# Get the parent directory where config.py is located
#sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

API_KEY = None
API_HOST = None

In [2]:
from config import API_KEY, API_HOST

headers = {
    "x-rapidapi-key": API_KEY,
    "x-rapidapi-host": API_HOST
}

In [3]:
today = (datetime.now()).strftime('%Y%m%d')

In [4]:
main_df = pd.read_csv('ready_for_pred.csv')

In [5]:
main_df.columns

Index(['longName', 'game_id', 'player_id', 'team_id', 'team', 'teamAbv', 'fga',
       'ast', 'tptfgm', 'fgm', 'fta', 'tptfga', 'OffReb', 'ftm', 'blk',
       'DefReb', 'plusMinus', 'stl', 'pts', 'fouls', 'TOV', 'usage',
       'mins_share', 'mins', 'mins_proj', 'salary', 'date', 'prim_pos', 'PG',
       'SG', 'SF', 'PF', 'C'],
      dtype='object')

In [6]:
#We can determine the site we're playing by seeing if there's a 'UTIL' column
#If there is, we're playing DraftKings. If not, we're playing FanDuel
site_bool = 'UTIL' not in list(main_df.columns)

In [7]:
# Loss functions

RMSE_FD = 9.683
RMSE_DK = 9.668

#pos_cols = []

# Scaling and predicting

In [8]:
if site_bool: 
    site = 'FD_'
    model_path = "../../best_XGB_FD.pkl"
    scaler_path = "../../nba_scaler_fd.pkl"
    rmse = RMSE_FD
    #pos_cols = ["PG", "SG", "SF", "PF", "C"]
else:
    site = 'DK_'
    model_path = "../../best_XGB_DK.pkl"
    scaler_path = "../../nba_scaler_dk.pkl"
    rmse = RMSE_DK
    #pos_cols = ["PG", "SG", "SF", "PF", "C", "G", "F", "UTIL"]

# Load model and scaler
with open(model_path, "rb") as model_file, open(scaler_path, "rb") as scaler_file:
    model = pickle.load(model_file)
    scaler = pickle.load(scaler_file)
    
# Rename for model compatibility
main_df = main_df.rename(columns={'PF': 'PF_pos'})
X = main_df.rename(columns={'fouls': 'PF'})  # Rename fouls to PF because in the api 'personal fouls' is PF

# Select features and scale
expected_feature_order = [
    'fga', 'ast', 'tptfgm', 'fgm', 'fta', 'tptfga', 'OffReb', 'ftm', 'blk',
    'DefReb', 'plusMinus', 'stl', 'pts', 'PF', 'TOV', 'usage', 'mins_share',
    'mins', 'mins_proj'
]
        
X = X[expected_feature_order]
X_scaled = scaler.transform(X)

# Predict with the XGBoost model
predictions = model.predict(X_scaled)

# Store predictions
main_df[site + "Pred"] = predictions
main_df[site + "Floor"] = predictions - rmse
main_df[site + "Ceiling"] = predictions + rmse
main_df[site + "Value"] = (main_df[site + 'Pred']/main_df['salary']) * 1000

# Restore PF position column
main_df = main_df.rename(columns={'PF_pos': 'PF'})


#         # Store predictions in session state
#         # st.session_state["main_df_sorted"] = main_df_sorted
#         # ✅ Calculate "Value" column
#         st.session_state["main_df"]["Value"] = (st.session_state["main_df"]["Pred"] / 
#                                                        st.session_state["main_df"]["salary"]) * 1000
#         st.success("Predictions generated!")

In [9]:
main_df = main_df.drop(columns = ['teamAbv', 'date', 'prim_pos'])

In [10]:
main_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 292 entries, 0 to 291
Data columns (total 34 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   longName    292 non-null    object 
 1   game_id     292 non-null    object 
 2   player_id   292 non-null    int64  
 3   team_id     292 non-null    int64  
 4   team        292 non-null    object 
 5   fga         292 non-null    float64
 6   ast         292 non-null    float64
 7   tptfgm      292 non-null    float64
 8   fgm         292 non-null    float64
 9   fta         292 non-null    float64
 10  tptfga      292 non-null    float64
 11  OffReb      292 non-null    float64
 12  ftm         292 non-null    float64
 13  blk         292 non-null    float64
 14  DefReb      292 non-null    float64
 15  plusMinus   292 non-null    float64
 16  stl         292 non-null    float64
 17  pts         292 non-null    float64
 18  fouls       292 non-null    float64
 19  TOV         292 non-null    f

In [11]:
main_df.head()

Unnamed: 0,longName,game_id,player_id,team_id,team,fga,ast,tptfgm,fgm,fta,...,salary,PG,SG,SF,PF,C,FD_Pred,FD_Floor,FD_Ceiling,FD_Value
0,Brook Lopez,20250210_GS@MIL,28006619932,17,MIL,10.0,1.866667,1.733333,4.4,1.533333,...,5900,False,False,False,False,True,21.65033,11.96733,31.333328,3.669547
1,Shake Milton,20250210_UTA@LAL,28008397499,14,LAL,3.6,2.266667,0.666667,1.8,1.333333,...,3700,True,False,False,False,False,8.60396,-1.07904,18.286961,2.325395
2,Gary Trent Jr.,20250210_GS@MIL,28038899869,17,MIL,9.066667,1.666667,3.0,4.133333,0.6,...,4200,False,True,True,False,False,14.739407,5.056407,24.422405,3.509383
3,Gary Payton II,20250210_GS@MIL,28038983399,10,GS,4.333333,1.4,0.4,2.333333,1.2,...,3700,True,True,False,False,False,8.808807,-0.874192,18.491806,2.380759
4,Buddy Hield,20250210_GS@MIL,28038998249,10,GS,9.8,2.333333,2.2,3.666667,1.133333,...,4900,True,True,False,False,False,16.359264,6.676265,26.042263,3.338625


# This is what goes to the Streamlit app
We only want to save predictions to the database once. We don't want to keep appending the same day's predictions over and over after possibly taking players out of the list and/or putting them back in. So if we have to re-run the EDA and Prediction notebooks over and over, we write it to the CSV here and it will overwrite.

In [12]:
if site == 'FD_':
    main_df.to_csv(site + 'predictions.csv', index = False)

In [13]:
# Define database file path
db_path = "../nba_dfs_model.db"

# Ensure expected columns exist in main_df
expected_cols = [
    "longName", "game_id", "player_id", "team_id", "team",
    "fga", "ast", "tptfgm", "fgm", "fta", "tptfga", "OffReb", "ftm", "blk",
    "DefReb", "plusMinus", "stl", "pts", "fouls", "TOV", "usage",
    "mins_share", "mins", "mins_proj", "salary", "PG", "SG", "SF", "PF", "C", 
    "G", "F", "UTIL", "FD_Pred", "FD_Floor", "FD_Ceiling", "FD_Value",
    "DK_Pred", "DK_Floor", "DK_Ceiling", "DK_Value"
]

# Add any missing columns
for col in expected_cols:
    if col not in main_df.columns:
        main_df[col] = None

# Convert to match database types
main_df = main_df.astype({
    "salary": "Int64",
    "PG": "Int64", "SG": "Int64", "SF": "Int64", "PF": "Int64", "C": "Int64",
    "G": "Int64", "F": "Int64", "UTIL": "Int64"
})

# Append predictions to database
with sqlite3.connect(db_path) as conn:
    try:
        main_df.to_sql("predictions", conn, if_exists="append", index=False)
        conn.commit()  # Ensure write is committed
        print("✅ Predictions successfully written!")

        # Check if rows were actually written
        cursor = conn.cursor()
        cursor.execute("SELECT COUNT(*) FROM predictions;")
        count = cursor.fetchone()[0]
        print(f"✅ Total rows in predictions table after insert: {count}")

    except Exception as e:
        print(f"❌ ERROR while writing to database: {e}")


✅ Predictions successfully written!
✅ Total rows in predictions table after insert: 553


Adding to database

In [14]:
# # Define database file path
# db_path = "../nba_dfs_model.db"

# # Connect to the SQLite database
# with sqlite3.connect(db_path) as conn:
#     cursor = conn.cursor()

#     # Create the table if it does not exist
#     cursor.execute("""
#     CREATE TABLE IF NOT EXISTS predictions (
#         longName TEXT,
#         game_id TEXT,
#         player_id TEXT,
#         team_id TEXT,
#         team TEXT,
#         fga REAL,
#         ast REAL,
#         tptfgm REAL,
#         fgm REAL,
#         fta REAL,
#         tptfga REAL,
#         OffReb REAL,
    #     ftm REAL,
    #     blk REAL,
    #     DefReb REAL,
    #     plusMinus REAL,
    #     stl REAL,
    #     pts REAL,
    #     fouls REAL,
    #     TOV REAL,
    #     usage REAL,
    #     mins_share REAL,
    #     mins REAL,
    #     mins_proj REAL,
    #     salary INTEGER,
    #     PG INTEGER,
    #     SG INTEGER,
    #     SF INTEGER,
    #     PF INTEGER,
    #     C INTEGER,
    #     G INTEGER, 
    #     F INTEGER, 
    #     UTIL INTEGER,
    #     FD_Pred REAL,
    #     FD_Floor REAL,
    #     FD_Ceiling REAL,
    #     FD_Value REAL,
    #     DK_Pred REAL,
    #     DK_Floor REAL,
    #     DK_Ceiling REAL,
    #     DK_Value REAL
    # );
#     """)

#     # Commit changes
#     conn.commit()

# print("Table 'predictions' is ready.")

# # --- Load Final Processed DataFrame (Ensure this is the FINAL `main_df`) ---
# # If you've already run your notebook and have `main_df` in memory, use it directly.
# # Otherwise, you can reload from a CSV:
# # main_df = pd.read_csv("final_predictions.csv")

# # Ensure column consistency for missing DK/FD values
# site = "FD_" if "UTIL" not in main_df.columns else "DK_"

# if site == "FD_":
#     main_df[["G", "F", "UTIL", "DK_Pred", "DK_Floor", "DK_Ceiling", "DK_Value"]] = None
# else:
#     main_df[["FD_Pred", "FD_Floor", "FD_Ceiling", "FD_Value"]] = None

# # Convert column types to match SQLite schema
# main_df = main_df.astype({
#     "salary": "Int64",
#     "PG": "Int64", "SG": "Int64", "SF": "Int64", "PF": "Int64", "C": "Int64",
#     "G": "Int64", "F": "Int64", "UTIL": "Int64"
# })

# # Append new predictions to the database
# with sqlite3.connect(db_path) as conn:
#     main_df.to_sql("predictions", conn, if_exists="append", index=False)

# print("Predictions appended successfully!")


In [15]:
main_df.head()

Unnamed: 0,longName,game_id,player_id,team_id,team,fga,ast,tptfgm,fgm,fta,...,FD_Floor,FD_Ceiling,FD_Value,G,F,UTIL,DK_Pred,DK_Floor,DK_Ceiling,DK_Value
0,Brook Lopez,20250210_GS@MIL,28006619932,17,MIL,10.0,1.866667,1.733333,4.4,1.533333,...,11.96733,31.333328,3.669547,,,,,,,
1,Shake Milton,20250210_UTA@LAL,28008397499,14,LAL,3.6,2.266667,0.666667,1.8,1.333333,...,-1.07904,18.286961,2.325395,,,,,,,
2,Gary Trent Jr.,20250210_GS@MIL,28038899869,17,MIL,9.066667,1.666667,3.0,4.133333,0.6,...,5.056407,24.422405,3.509383,,,,,,,
3,Gary Payton II,20250210_GS@MIL,28038983399,10,GS,4.333333,1.4,0.4,2.333333,1.2,...,-0.874192,18.491806,2.380759,,,,,,,
4,Buddy Hield,20250210_GS@MIL,28038998249,10,GS,9.8,2.333333,2.2,3.666667,1.133333,...,6.676265,26.042263,3.338625,,,,,,,


In [28]:
# with sqlite3.connect("../nba_dfs_model.db") as conn:
#     df_check = pd.read_sql_query("SELECT * FROM predictions LIMIT 5", conn)
#     print(df_check)


         longName           game_id    player_id team_id team        fga  \
0     Brook Lopez  20250209_PHI@MIL  28006619932      17  MIL  10.200000   
1    Steven Adams  20250209_TOR@HOU  28018735349      11  HOU   4.066667   
2  Garrett Temple  20250209_TOR@HOU  28026396452      28  TOR   0.866667   
3      Kyle Lowry  20250209_PHI@MIL  28036327222      23  PHI   2.866667   
4  Gary Trent Jr.  20250209_PHI@MIL  28038899869      17  MIL   8.066667   

        ast    tptfgm       fgm       fta  ...     F  UTIL  FD_Pred  FD_Floor  \
0  1.933333  1.933333  4.533333  1.533333  ...  None  None     None      None   
1  1.133333  0.000000  2.000000  2.200000  ...  None  None     None      None   
2  0.600000  0.066667  0.133333  0.266667  ...  None  None     None      None   
3  2.933333  0.666667  0.933333  0.666667  ...  None  None     None      None   
4  1.466667  2.600000  3.800000  0.400000  ...  None  None     None      None   

   FD_Ceiling  FD_Value  DK_Pred  DK_Floor  DK_Ceiling  

In [29]:
# with sqlite3.connect("../nba_dfs_model.db") as conn:
#     try:
#         main_df.to_sql("predictions", conn, if_exists="append", index=False)
#         print("✅ Predictions successfully written!")
#     except Exception as e:
#         print(f"❌ ERROR while writing to database: {e}")

✅ Predictions successfully written!


In [30]:
# with sqlite3.connect("../nba_dfs_model.db") as conn:
#     # cursor = conn.cursor()
#     cursor.execute("PRAGMA table_info(predictions);")
#     db_columns = [row[1] for row in cursor.fetchall()]
#     print("DB Columns:", db_columns)

# print("DataFrame Columns:", main_df.columns.tolist())

# # Find mismatches
# missing_in_df = set(db_columns) - set(main_df.columns)
# extra_in_df = set(main_df.columns) - set(db_columns)

# print(f"Columns missing in main_df: {missing_in_df}")
# print(f"Extra columns in main_df not in DB: {extra_in_df}")


DB Columns: ['longName', 'game_id', 'player_id', 'team_id', 'team', 'fga', 'ast', 'tptfgm', 'fgm', 'fta', 'tptfga', 'OffReb', 'ftm', 'blk', 'DefReb', 'plusMinus', 'stl', 'pts', 'fouls', 'TOV', 'usage', 'mins_share', 'mins', 'mins_proj', 'salary', 'PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'UTIL', 'FD_Pred', 'FD_Floor', 'FD_Ceiling', 'FD_Value', 'DK_Pred', 'DK_Floor', 'DK_Ceiling', 'DK_Value']
DataFrame Columns: ['longName', 'game_id', 'player_id', 'team_id', 'team', 'fga', 'ast', 'tptfgm', 'fgm', 'fta', 'tptfga', 'OffReb', 'ftm', 'blk', 'DefReb', 'plusMinus', 'stl', 'pts', 'fouls', 'TOV', 'usage', 'mins_share', 'mins', 'mins_proj', 'salary', 'PG', 'SG', 'SF', 'PF', 'C', 'FD_Pred', 'FD_Floor', 'FD_Ceiling', 'FD_Value']
Columns missing in main_df: {'G', 'F', 'DK_Floor', 'UTIL', 'DK_Pred', 'DK_Value', 'DK_Ceiling'}
Extra columns in main_df not in DB: set()


In [31]:
# with sqlite3.connect("../nba_dfs_model.db") as conn:
#     cursor = conn.cursor()
#     cursor.execute("SELECT COUNT(*) FROM predictions;")
#     count = cursor.fetchone()[0]
#     print(f"✅ Total rows in predictions table: {count}")


✅ Total rows in predictions table: 174
