# Predictions
This notebook will generate predictions for FanDuel or DraftKings and store them in our database.

In [27]:
import pandas as pd
import numpy as np
from datetime import datetime
import os
import requests
import sqlite3
import re
import matplotlib.pyplot as plt
import pickle

# Get the parent directory where config.py is located
#sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

API_KEY = None
API_HOST = None

In [28]:
from config import API_KEY, API_HOST

headers = {
    "x-rapidapi-key": API_KEY,
    "x-rapidapi-host": API_HOST
}

In [29]:
today = (datetime.now()).strftime('%Y%m%d')

In [30]:
main_df = pd.read_csv('ready_for_pred.csv')

In [31]:
main_df.columns

Index(['longName', 'game_id', 'player_id', 'team_id', 'team', 'teamAbv', 'fga',
       'ast', 'tptfgm', 'fgm', 'fta', 'tptfga', 'OffReb', 'ftm', 'blk',
       'DefReb', 'plusMinus', 'stl', 'pts', 'fouls', 'TOV', 'usage',
       'mins_share', 'mins', 'mins_proj', 'salary', 'date', 'prim_pos', 'PG',
       'SG', 'SF', 'PF', 'C'],
      dtype='object')

In [32]:
#We can determine the site we're playing by seeing if there's a 'UTIL' column
#If there is, we're playing DraftKings. If not, we're playing FanDuel
site_bool = 'UTIL' not in list(main_df.columns)

In [33]:
# Loss functions

RMSE_FD = 9.683
RMSE_DK = 9.668

#pos_cols = []

# Scaling and predicting

In [34]:
if site_bool: 
    site = 'FD_'
    model_path = "../../best_XGB_FD.pkl"
    scaler_path = "../../nba_scaler_fd.pkl"
    rmse = RMSE_FD
    #pos_cols = ["PG", "SG", "SF", "PF", "C"]
else:
    site = 'DK_'
    model_path = "../../best_XGB_DK.pkl"
    scaler_path = "../../nba_scaler_dk.pkl"
    rmse = RMSE_DK
    #pos_cols = ["PG", "SG", "SF", "PF", "C", "G", "F", "UTIL"]

# Load model and scaler
with open(model_path, "rb") as model_file, open(scaler_path, "rb") as scaler_file:
    model = pickle.load(model_file)
    scaler = pickle.load(scaler_file)
    
# Rename for model compatibility
main_df = main_df.rename(columns={'PF': 'PF_pos'})
X = main_df.rename(columns={'fouls': 'PF'})  # Rename fouls to PF because in the api 'personal fouls' is PF

# Select features and scale
expected_feature_order = [
    'fga', 'ast', 'tptfgm', 'fgm', 'fta', 'tptfga', 'OffReb', 'ftm', 'blk',
    'DefReb', 'plusMinus', 'stl', 'pts', 'PF', 'TOV', 'usage', 'mins_share',
    'mins', 'mins_proj'
]
        
X = X[expected_feature_order]
X_scaled = scaler.transform(X)

# Predict with the XGBoost model
predictions = model.predict(X_scaled)

# Store predictions
main_df[site + "Pred"] = predictions
main_df[site + "Floor"] = predictions - rmse
main_df[site + "Ceiling"] = predictions + rmse
main_df[site + "Value"] = (main_df[site + 'Pred']/main_df['salary']) * 1000

# Restore PF position column
main_df = main_df.rename(columns={'PF_pos': 'PF'})


#         # Store predictions in session state
#         # st.session_state["main_df_sorted"] = main_df_sorted
#         # ✅ Calculate "Value" column
#         st.session_state["main_df"]["Value"] = (st.session_state["main_df"]["Pred"] / 
#                                                        st.session_state["main_df"]["salary"]) * 1000
#         st.success("Predictions generated!")

In [42]:
main_df = main_df.drop(columns = ['teamAbv', 'date'])

KeyError: "['teamAbv'] not found in axis"

Adding to database

In [None]:
# Define database file path
db_path = "../nba_dfs_model.db"

# Connect to the SQLite database
with sqlite3.connect(db_path) as conn:
    cursor = conn.cursor()

    # Create the table if it does not exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS predictions (
        longName TEXT,
        game_id TEXT,
        player_id TEXT,
        team_id TEXT,
        team TEXT,
        fga REAL,
        ast REAL,
        tptfgm REAL,
        fgm REAL,
        fta REAL,
        tptfga REAL,
        OffReb REAL,
        ftm REAL,
        blk REAL,
        DefReb REAL,
        plusMinus REAL,
        stl REAL,
        pts REAL,
        fouls REAL,
        TOV REAL,
        usage REAL,
        mins_share REAL,
        mins REAL,
        mins_proj REAL,
        salary INTEGER,
        PG INTEGER,
        SG INTEGER,
        SF INTEGER,
        PF INTEGER,
        C INTEGER,
        G INTEGER, 
        F INTEGER, 
        UTIL INTEGER,
        FD_Pred REAL,
        FD_Floor REAL,
        FD_Ceiling REAL,
        FD_Value REAL,
        DK_Pred REAL,
        DK_Floor REAL,
        DK_Ceiling REAL,
        DK_Value REAL
    );
    """)

    # Commit changes
    conn.commit()

print("Table 'predictions' is ready.")

# --- Load Final Processed DataFrame (Ensure this is the FINAL `main_df`) ---
# If you've already run your notebook and have `main_df` in memory, use it directly.
# Otherwise, you can reload from a CSV:
# main_df = pd.read_csv("final_predictions.csv")

# Ensure column consistency for missing DK/FD values
site = "FD_" if "UTIL" not in main_df.columns else "DK_"

if site == "FD_":
    main_df[["G", "F", "UTIL", "DK_Pred", "DK_Floor", "DK_Ceiling", "DK_Value"]] = None
else:
    main_df[["FD_Pred", "FD_Floor", "FD_Ceiling", "FD_Value"]] = None

# Convert column types to match SQLite schema
main_df = main_df.astype({
    "salary": "Int64",
    "PG": "Int64", "SG": "Int64", "SF": "Int64", "PF": "Int64", "C": "Int64",
    "G": "Int64", "F": "Int64", "UTIL": "Int64"
})

# Append new predictions to the database
with sqlite3.connect(db_path) as conn:
    main_df.to_sql("predictions", conn, if_exists="append", index=False)

print("Predictions appended successfully!")
