In [9]:
#QB ML MODEL
import pandas as pd
import numpy as np
import warnings
from sklearn.preprocessing import MinMaxScaler

pd.options.mode.chained_assignment = None

import sklearn
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

from sklearn.model_selection import train_test_split
import joblib


#scaler to scale data
scaler = MinMaxScaler()

#read csv files into pandas
dfFantasy = pd.read_csv("/Users/kmaran3/Dropbox/Darkhorse/final position group data/final_qb_data.csv")
dfFantasy.replace([np.inf, -np.inf], np.nan, inplace=True)
numeric_cols = dfFantasy.select_dtypes(include=[np.number]).columns
for column in numeric_cols:
    dfFantasy[column].fillna(dfFantasy[column].mean(), inplace=True)
dfGrades = pd.read_csv("/Users/kmaran3/Dropbox/Darkhorse/approximate value data/AVbyPositionGroup.csv")

def correctData(df, pprTF):
  #cols to make per game
  cols = ['completions', 'attempts', 'passing_yards',
       'passing_tds', 'interceptions', 'sacks',
       'sack_fumbles_lost', 'passing_air_yards', 'passing_yards_after_catch',
       'passing_first_downs', 'passing_2pt_conversions',
       'carries', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'rushing_first_downs',
       'rushing_2pt_conversions', 'fantasy_points', 'age']

  #basing data if ppr or not
  if pprTF == 2:
    pass
  elif pprTF == 0:
    pass
  elif pprTF == 1:
    pass

    
  #adding ppg column
  df.loc[:, 'PPG'] = df['fantasy_points'] / df['GP']


  #make all columns in a per game basis
  for col in cols:
    df.loc[:, col] = df[col] / df['GP'] 


  #only players with more than 7 games.
  df = df[df.GP > 7]
  df = df[df.fantasy_points >= 0]

  df = df[df.PPG > 5]
  

  return df

#removes unneccesary stats
def removeUnwanted(dfPos, pos):
  dfPos = dfPos.drop(columns=['season',"GP", "season_type", "fantasy_points", "player_display_name", "player_id", "team", "position"])
  return dfPos

#shifts data forward one year
def makeCorrectShift(df):
  shifters = ['PPG','season','GP','season_type','age','fantasy_points','completions','attempts','passing_yards','passing_tds','interceptions','sacks','sack_fumbles_lost','passing_air_yards','passing_yards_after_catch','passing_first_downs','passing_2pt_conversions','carries','rushing_yards','rushing_tds','rushing_fumbles_lost','rushing_first_downs','rushing_2pt_conversions']
  
  #adds target variable
  df["PPG"] = df["PPG"]
  
  #shifts it forward a year (for example 2011 goes to 2012)
  df[shifters] = df.groupby('player_display_name')[shifters].shift(1)
  df = df.dropna()

  return df

#where machine learning is done. returns the model and score.
from sklearn.inspection import permutation_importance

def machineLearning(df, arr, dictParam):
    # Define predictors excluding the target variable
    predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']

    # Split the data
    x = df[predictors].values
    y = df["PPG"].values
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

    # Initialize and train GradientBoostingRegressor
    gbr = GradientBoostingRegressor(**dictParam)
    gbr.fit(x_train, y_train)

    # Evaluate the model
    predict_test = gbr.predict(x_test)
    mae = mean_absolute_error(y_test, predict_test)

    predict_test_unscaled = predict_test * (arr[1] - arr[0]) + arr[0]
    y_test_unscaled = y_test * (arr[1] - arr[0]) + arr[0]

    # Calculate permutation importance
    r = permutation_importance(gbr, x_test, y_test, n_repeats=100, random_state=0)

    # Organize importances
    importance_dict = {name: score for name, score in zip(predictors, r.importances_mean)}
    sorted_importances = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)
    
    for feature, importance in sorted_importances:
      print(f"{feature}: {importance}")


    return [mae, gbr]

# Example usage of the modified function


def getBestParams(df, arr):

  #make the predictors and data and test sets correctly
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  
  #make the parameters to search over. for hidden_layer_sizes, I experimented with alot and the ones listed now is just final set of experiment.
  
  grid = {
      'n_estimators': [100, 200, 300],
      'learning_rate': [0.01, 0.1, 0.2],
      'max_depth': [3, 4, 5],
      'min_samples_split': [2, 3, 4]
  }

  #create an MLPRegressor object
  gbr = GradientBoostingRegressor()

  #create a GridSearchCV object and fit it to the training data
  grid_search = GridSearchCV(gbr, param_grid=grid, cv=5, n_jobs=-1)
  grid_search.fit(x_train, y_train)

  #the best model to make predictions on the test data and evaluate performance
  y_pred = grid_search.predict(x_test)

  #inverse transform the scaled predictions to get the original scale, uses a reverse of original formula
  for i in range(len(y_pred)):
    y_pred[i] = (y_pred[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]


  print(mean_absolute_error(y_test, y_pred))

  return grid_search.best_params_

#gets original value for fantasy points for predictions.
def getScaleBack(df):
  #index of column
  column_index = df.columns.get_loc("PPG")

  #min value of column:
  min_value = df["PPG"].min()

  #scaling valye of column
  #scaling_factor = scaler.scale_[column_index]
  max_value = df["PPG"].max()

  #array to be used later to scale each data
  arr = [min_value, max_value]

  return arr

def test(df, model, arr):
  #make columns everything but target
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']


  #make train and test sets
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  #make the predictions
  predict_test = model.predict(x_test)

  #inverse transform the scaled predictions to get the original scale by reversing formula
  for i in range(len(predict_test)):
    predict_test[i] = (predict_test[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]

  #average error 
  mae = mean_absolute_error(y_test, predict_test)
  print("test ", mae)

#if ppr is 0, than it is non ppr. if 1, then it is half ppr. if 2, full ppr. loops through each.
for ppr in [0,1,2]:

  dfFantasyCopy = dfFantasy.copy()

  dfFantasyCopy = correctData(dfFantasyCopy, ppr)

  dfFantasyCopy = makeCorrectShift(dfFantasyCopy)

  dfFantasyCopy = dfFantasyCopy.loc[dfFantasyCopy["season"] != 2012]

  dfFantasyCopy = removeUnwanted(dfFantasyCopy, "QB")

  dfFantasyCopy = dfFantasyCopy.reset_index(drop=True)

  #gets fantasy_points_ppr scale per each position
  scaleQB = getScaleBack(dfFantasyCopy)

  dfFantasyCopy[dfFantasyCopy.columns] = scaler.fit_transform(dfFantasyCopy[dfFantasyCopy.columns])

  #obtained by running the getBestParams function per each respective position
  paramQB = getBestParams(dfFantasyCopy, scaleQB)

  #makes array of model and score, then prints it
  qbArray = machineLearning(dfFantasyCopy, scaleQB, paramQB)
  num = qbArray[0]
  qbModel = qbArray[1]
  print("qb score(ppg off on average per player): ", num)

  if ppr == 0:
      joblib.dump(qbModel, "/Users/kmaran3/Dropbox/Darkhorse/Models/qb models/qbModelNonPPR.joblib")
  elif ppr == 1:
      joblib.dump(qbModel, "/Users/kmaran3/Dropbox/Darkhorse/Models/qb models/qbModelHalfPPR.joblib")
  elif ppr == 2:
      joblib.dump(qbModel, "/Users/kmaran3/Dropbox/Darkhorse/Models/qb models/qbModelPPR.joblib")



0.8207726314870332
passing_tds: 0.6126532669158066
rushing_yards: 0.10716838168748449
passing_yards: 0.09925729633327889
carries: 0.07587437312442773
rushing_tds: 0.06973244531957129
rushing_first_downs: 0.040666698557862896
passing_first_downs: 0.02350960043469319
interceptions: 0.02022502921456087
passing_yards_after_catch: 0.004855832984022546
passing_air_yards: 0.003603952908837854
qb: 0.0032053219751899563
sack_fumbles_lost: 0.001289617938883313
passing_2pt_conversions: 0.0011238619141196214
rb: 0.0005816515189396254
rushing_fumbles_lost: 0.000495423908193593
dst: 0.00013568762290885327
rushing_2pt_conversions: 3.847197681366099e-05
wrte: -0.00016813435144822698
oline: -0.0002236699855607649
completions: -0.00041897616600684363
age: -0.0006641459198772892
attempts: -0.002329899884755935
sacks: -0.003714440038306438
qb score(ppg off on average per player):  0.03332744464062
0.825445559728367
passing_tds: 0.6146860495760926
rushing_yards: 0.10565737150554728
passing_yards: 0.0986668

In [11]:
#RB ML MODEL
import pandas as pd
import numpy as np
import warnings
from sklearn.preprocessing import MinMaxScaler

pd.options.mode.chained_assignment = None

import sklearn
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

from sklearn.model_selection import train_test_split
import joblib


#scaler to scale data
scaler = MinMaxScaler()

#read csv files into pandas
dfFantasy = pd.read_csv("/Users/kmaran3/Dropbox/Darkhorse/final position group data/final_rb_data.csv")
dfFantasy.replace([np.inf, -np.inf], np.nan, inplace=True)
numeric_cols = dfFantasy.select_dtypes(include=[np.number]).columns
for column in numeric_cols:
    dfFantasy[column].fillna(dfFantasy[column].mean(), inplace=True)
dfGrades = pd.read_csv("/Users/kmaran3/Dropbox/Darkhorse/approximate value data/AVbyPositionGroup.csv")

def correctData(df, pprTF):
  #cols to make per game
  cols = ['carries', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_2pt_conversions', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs',
       'receiving_2pt_conversions', 'special_teams_tds', 'fantasy_points', 'rrtd', 'age']

  #basing data if ppr or not
  if pprTF == 2:
    pass
  elif pprTF == 0:
    df.loc[:, "fantasy_points"] = df["fantasy_points"] - df["receptions"]
  elif pprTF == 1:
    df.loc[:, "fantasy_points"] = df["fantasy_points"] - (df["receptions"]/2)

    
  #adding ppg column
  df.loc[:, 'PPG'] = df['fantasy_points'] / df['GP']


  #make all columns in a per game basis
  for col in cols:
    df.loc[:, col] = df[col] / df['GP'] 


  #only players with more than 7 games.
  df = df[df.GP > 7]
  df = df[df.fantasy_points >= 0]

  df = df[df.PPG > 2]
  

  return df

#removes unneccesary stats
def removeUnwanted(dfPos, pos):
  dfPos = dfPos.drop(columns=['season',"GP", "season_type", "fantasy_points", "player_display_name", "player_id", "team", "position"])
  return dfPos

#shifts data forward one year
def makeCorrectShift(df):
  shifters = ['player_id', 'season', 'player_display_name', 'team', 'GP', 'position',
       'age','season_type', 'carries', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'rushing_first_downs',
       'rushing_2pt_conversions', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs',
       'receiving_2pt_conversions', 'special_teams_tds', 'fantasy_points',
       'rrtd']
  
  #adds target variable
  df["PPG"] = df["PPG"]
  
  #shifts it forward a year (for example 2011 goes to 2012)
  df[shifters] = df.groupby('player_display_name')[shifters].shift(1)
  df = df.dropna()

  return df

#where machine learning is done. returns the model and score.
from sklearn.inspection import permutation_importance

def machineLearning(df, arr, dictParam):
    # Define predictors excluding the target variable
    predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']

    # Split the data
    x = df[predictors].values
    y = df["PPG"].values
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

    # Initialize and train GradientBoostingRegressor
    gbr = GradientBoostingRegressor(**dictParam)
    gbr.fit(x_train, y_train)

    # Evaluate the model
    predict_test = gbr.predict(x_test)
    mae = mean_absolute_error(y_test, predict_test)

    predict_test_unscaled = predict_test * (arr[1] - arr[0]) + arr[0]
    y_test_unscaled = y_test * (arr[1] - arr[0]) + arr[0]

    # print("Predicted vs Actual PPG (unscaled):")
    # for pred, actual in zip(predict_test_unscaled, y_test_unscaled):
    #     print(f"Predicted: {pred:.2f}, Actual: {actual:.2f}")

    # Calculate permutation importance
    r = permutation_importance(gbr, x_test, y_test, n_repeats=100, random_state=0)

    # Organize importances
    importance_dict = {name: score for name, score in zip(predictors, r.importances_mean)}
    sorted_importances = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)

    for feature, importance in sorted_importances:
        print(f"{feature}: {importance}")
    


    return [mae, gbr, sorted_importances]

# Example usage of the modified function


def getBestParams(df, arr):

  #make the predictors and data and test sets correctly
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  
  #make the parameters to search over. for hidden_layer_sizes, I experimented with alot and the ones listed now is just final set of experiment.
  
  grid = {
      'n_estimators': [100, 200, 300],
      'learning_rate': [0.01, 0.1, 0.2],
      'max_depth': [3, 4, 5],
      'min_samples_split': [2, 3, 4]
  }

  #create an MLPRegressor object
  gbr = GradientBoostingRegressor()

  #create a GridSearchCV object and fit it to the training data
  grid_search = GridSearchCV(gbr, param_grid=grid, cv=5, n_jobs=-1)
  grid_search.fit(x_train, y_train)

  #the best model to make predictions on the test data and evaluate performance
  y_pred = grid_search.predict(x_test)

  #inverse transform the scaled predictions to get the original scale, uses a reverse of original formula
  for i in range(len(y_pred)):
    y_pred[i] = (y_pred[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]


  # print(mean_absolute_error(y_test, y_pred))

  return grid_search.best_params_

#gets original value for fantasy points for predictions.
def getScaleBack(df):
  #index of column
  column_index = df.columns.get_loc("PPG")

  #min value of column:
  min_value = df["PPG"].min()

  #scaling valye of column
  #scaling_factor = scaler.scale_[column_index]
  max_value = df["PPG"].max()

  #array to be used later to scale each data
  arr = [min_value, max_value]

  return arr

def test(df, model, arr):
  #make columns everything but target
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']


  #make train and test sets
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  #make the predictions
  predict_test = model.predict(x_test)

  #inverse transform the scaled predictions to get the original scale by reversing formula
  for i in range(len(predict_test)):
    predict_test[i] = (predict_test[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]

  #average error 
  mae = mean_absolute_error(y_test, predict_test)
  print("test ", mae)

#if ppr is 0, than it is non ppr. if 1, then it is half ppr. if 2, full ppr. loops through each.
for ppr in [0,1,2]:

  dfFantasyCopy = dfFantasy.copy()

  dfFantasyCopy = correctData(dfFantasyCopy, ppr)

  dfFantasyCopy = makeCorrectShift(dfFantasyCopy)

  dfFantasyCopy = dfFantasyCopy.loc[dfFantasyCopy["season"] != 2012]

  dfFantasyCopy = removeUnwanted(dfFantasyCopy, "RB")

  dfFantasyCopy = dfFantasyCopy.reset_index(drop=True)

  #gets fantasy_points_ppr scale per each position
  scaleRB = getScaleBack(dfFantasyCopy)

  dfFantasyCopy[dfFantasyCopy.columns] = scaler.fit_transform(dfFantasyCopy[dfFantasyCopy.columns])

  #obtained by running the getBestParams function per each respective position
  paramRB = getBestParams(dfFantasyCopy, scaleRB)

  #makes array of model and score, then prints it
  rbArray = machineLearning(dfFantasyCopy, scaleRB, paramRB)
  num = rbArray[0]
  rbModel = rbArray[1]

  print("rb score(ppg off on average per player): ", num)
  if ppr == 0:
      joblib.dump(rbModel, "/Users/kmaran3/Dropbox/Darkhorse/Models/rb models/rbModelNonPPR.joblib")
  elif ppr == 1:
      joblib.dump(rbModel, "/Users/kmaran3/Dropbox/Darkhorse/Models/rb models/rbModelHalfPPR.joblib")
  elif ppr == 2:
      joblib.dump(rbModel, "/Users/kmaran3/Dropbox/Darkhorse/Models/rb models/rbModelPPR.joblib")
#print(dfFantasyRB.columns)

rushing_yards: 0.19705952635662471
rb: 0.08895611684001857
carries: 0.04064098182302581
rushing_first_downs: 0.018946216681800215
rushing_fumbles_lost: 0.004197501934881821
dst: 0.0032849529774148212
rushing_tds: 0.0015565643158663512
rrtd: 0.0008154688750638272
wrte: 0.0006088535797573702
qb: 0.0005142626594790334
special_teams_tds: 0.00014294052556384828
receiving_yards: 9.243761813319096e-05
rushing_2pt_conversions: 0.0
receiving_fumbles_lost: 0.0
receiving_2pt_conversions: 0.0
receiving_tds: -0.0012080405704691843
oline: -0.0023963298329282667
receiving_air_yards: -0.007319873034463129
targets: -0.008546492663423687
receiving_yards_after_catch: -0.010417438677411638
receptions: -0.013354227285292904
receiving_first_downs: -0.02914880983608337
age: -0.03261999042803542
rb score(ppg off on average per player):  0.13963064583145904
rushing_yards: 0.21915171846215478
rb: 0.038677789031732114
age: 0.03495162901177484
rushing_first_downs: 0.024709998703693975
rrtd: 0.018266623267472783
r

In [12]:
#WRTE ML MODEL
import pandas as pd
import numpy as np
import warnings
from sklearn.preprocessing import MinMaxScaler

pd.options.mode.chained_assignment = None

import sklearn
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

from sklearn.model_selection import train_test_split
import joblib


#scaler to scale data
scaler = MinMaxScaler()

#read csv files into pandas
dfFantasy = pd.read_csv("/Users/kmaran3/Dropbox/Darkhorse/final position group data/final_wrte_data.csv")
dfFantasy.replace([np.inf, -np.inf], np.nan, inplace=True)
numeric_cols = dfFantasy.select_dtypes(include=[np.number]).columns
for column in numeric_cols:
    dfFantasy[column].fillna(dfFantasy[column].mean(), inplace=True)
dfGrades = pd.read_csv("/Users/kmaran3/Dropbox/Darkhorse/approximate value data/AVbyPositionGroup.csv")

def correctData(df, pprTF):
  #cols to make per game
  cols = ['carries', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_2pt_conversions', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs',
       'receiving_2pt_conversions', 'special_teams_tds', 'fantasy_points', 'rrtd', 'age']

  #basing data if ppr or not
  if pprTF == 2:
    pass
  elif pprTF == 0:
    df.loc[:, "fantasy_points"] = df["fantasy_points"] - df["receptions"]
  elif pprTF == 1:
    df.loc[:, "fantasy_points"] = df["fantasy_points"] - (df["receptions"]/2)

    
  #adding ppg column
  df.loc[:, 'PPG'] = df['fantasy_points'] / df['GP']


  #make all columns in a per game basis
  for col in cols:
    df.loc[:, col] = df[col] / df['GP'] 


  #only players with more than 7 games.
  df = df[df.GP > 7]
  df = df[df.fantasy_points >= 0]

  df = df[df.PPG > 2]
  

  return df

#removes unneccesary stats
def removeUnwanted(dfPos, pos):
  dfPos = dfPos.drop(columns=['season',"GP", "season_type", "fantasy_points", "player_display_name", "player_id", "team", "position"])
  return dfPos

#shifts data forward one year
def makeCorrectShift(df):
  shifters = ['player_id', 'season', 'player_display_name', 'team', 'GP', 'position',
       'age', 'season_type', 'carries', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'rushing_first_downs',
       'rushing_2pt_conversions', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs',
       'receiving_2pt_conversions', 'special_teams_tds', 'fantasy_points',
       'rrtd']
  
  #adds target variable
  df["PPG"] = df["PPG"]
  
  #shifts it forward a year (for example 2011 goes to 2012)
  df[shifters] = df.groupby('player_display_name')[shifters].shift(1)
  df = df.dropna()

  return df

#where machine learning is done. returns the model and score.
from sklearn.inspection import permutation_importance

def machineLearning(df, arr, dictParam):
    # Define predictors excluding the target variable
    predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']

    # Split the data
    x = df[predictors].values
    y = df["PPG"].values
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

    # Initialize and train MLPRegressor
    gbr = GradientBoostingRegressor(**dictParam)
    gbr.fit(x_train, y_train)

    # Evaluate the model
    predict_test = gbr.predict(x_test)
    mae = mean_absolute_error(y_test, predict_test)

    predict_test_unscaled = predict_test * (arr[1] - arr[0]) + arr[0]
    y_test_unscaled = y_test * (arr[1] - arr[0]) + arr[0]

    # print("Predicted vs Actual PPG (unscaled):")
    # for pred, actual in zip(predict_test_unscaled, y_test_unscaled):
    #     print(f"Predicted: {pred:.2f}, Actual: {actual:.2f}")

    # Calculate permutation importance
    r = permutation_importance(gbr, x_test, y_test, n_repeats=100, random_state=0)

    # Organize importances
    importance_dict = {name: score for name, score in zip(predictors, r.importances_mean)}
    sorted_importances = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)

    # Print sorted importances
    for feature, importance in sorted_importances:
        print(f"{feature}: {importance}")
    


    return [mae, gbr, sorted_importances]

# Example usage of the modified function


def getBestParams(df, arr):

  #make the predictors and data and test sets correctly
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  
  #make the parameters to search over. for hidden_layer_sizes, I experimented with alot and the ones listed now is just final set of experiment.
  
  grid = {
      'n_estimators': [100, 200, 300],
      'learning_rate': [0.01, 0.1, 0.2],
      'max_depth': [3, 4, 5],
      'min_samples_split': [2, 3, 4]
  }

  #create an MLPRegressor object
  gbr = GradientBoostingRegressor()

  #create a GridSearchCV object and fit it to the training data
  grid_search = GridSearchCV(gbr, param_grid=grid, cv=5, n_jobs=-1)
  grid_search.fit(x_train, y_train)

  #the best model to make predictions on the test data and evaluate performance
  y_pred = grid_search.predict(x_test)

  #inverse transform the scaled predictions to get the original scale, uses a reverse of original formula
  for i in range(len(y_pred)):
    y_pred[i] = (y_pred[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]


  # print(mean_absolute_error(y_test, y_pred))

  return grid_search.best_params_

#gets original value for fantasy points for predictions.
def getScaleBack(df):
  #index of column
  column_index = df.columns.get_loc("PPG")

  #min value of column:
  min_value = df["PPG"].min()

  #scaling valye of column
  #scaling_factor = scaler.scale_[column_index]
  max_value = df["PPG"].max()

  #array to be used later to scale each data
  arr = [min_value, max_value]

  return arr

def test(df, model, arr):
  #make columns everything but target
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']


  #make train and test sets
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  #make the predictions
  predict_test = model.predict(x_test)

  #inverse transform the scaled predictions to get the original scale by reversing formula
  for i in range(len(predict_test)):
    predict_test[i] = (predict_test[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]

  #average error 
  mae = mean_absolute_error(y_test, predict_test)
  # print("test ", mae)

#if ppr is 0, than it is non ppr. if 1, then it is half ppr. if 2, full ppr. loops through each.
for ppr in [0,1,2]:

  dfFantasyCopy = dfFantasy.copy()

  dfFantasyCopy = correctData(dfFantasyCopy, ppr)
  
  dfFantasyCopy = makeCorrectShift(dfFantasyCopy)

  dfFantasyCopy = dfFantasyCopy.loc[dfFantasyCopy["season"] != 2012]

  dfFantasyCopy = removeUnwanted(dfFantasyCopy, "WRTE")

  dfFantasyCopy = dfFantasyCopy.reset_index(drop=True)

  #gets fantasy_points_ppr scale per each position
  scaleWRTE = getScaleBack(dfFantasyCopy)

  dfFantasyCopy[dfFantasyCopy.columns] = scaler.fit_transform(dfFantasyCopy[dfFantasyCopy.columns])

  #obtained by running the getBestParams function per each respective position
  paramWRTE = getBestParams(dfFantasyCopy, scaleWRTE)

  #makes array of model and score, then prints it
  wrteArray = machineLearning(dfFantasyCopy, scaleWRTE, paramWRTE)
  num = wrteArray[0]
  wrteModel = wrteArray[1]

  print("wrte score(ppg off on average per player): ", num)
  if ppr == 0:
      joblib.dump(wrteModel, "/Users/kmaran3/Dropbox/Darkhorse/Models/wrte models/wrteModelNonPPR.joblib")
  elif ppr == 1:
      joblib.dump(wrteModel, "/Users/kmaran3/Dropbox/Darkhorse/Models/wrte models/wrteModelHalfPPR.joblib")
  elif ppr == 2:
      joblib.dump(wrteModel, "/Users/kmaran3/Dropbox/Darkhorse/Models/wrte models/wrteModelPPR.joblib")

receiving_air_yards: 0.1250707371682855
receiving_yards_after_catch: 0.11229775462159275
wrte: 0.03268371579316379
targets: 0.0300772544304533
qb: 0.029070338016393523
oline: 0.014732345111481065
receiving_fumbles_lost: 0.0072267401577955
receiving_first_downs: 0.002779057502834823
receiving_2pt_conversions: 0.0007166515895771075
rushing_fumbles_lost: 0.0
rushing_2pt_conversions: 0.0
special_teams_tds: 0.0
carries: -0.0003525119852836711
rushing_first_downs: -0.0009487166040506978
dst: -0.0010783858332679885
rrtd: -0.0019367915705777571
receiving_tds: -0.004036806736027051
receiving_yards: -0.005085659461043456
age: -0.0065045047019463235
rushing_yards: -0.007901671016131652
rushing_tds: -0.011327300845867137
rb: -0.011866773880060829
receptions: -0.015912024427763526
wrte score(ppg off on average per player):  0.11536116042403276
receiving_yards: 0.16060506506422306
receiving_air_yards: 0.14300933764884516
qb: 0.0238652059519602
receiving_yards_after_catch: 0.021119153265339002
wrte: 