In [1]:
#QB ML MODEL
import pandas as pd
import numpy as np
import warnings
from sklearn.preprocessing import MinMaxScaler

pd.options.mode.chained_assignment = None

import sklearn
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

from sklearn.model_selection import train_test_split
import joblib


#scaler to scale data
scaler = MinMaxScaler()

#read csv files into pandas
dfFantasy = pd.read_csv("final position group data/final_qb_data.csv")
dfFantasy.replace([np.inf, -np.inf], np.nan, inplace=True)
numeric_cols = dfFantasy.select_dtypes(include=[np.number]).columns
for column in numeric_cols:
    dfFantasy[column].fillna(dfFantasy[column].mean(), inplace=True)
dfGrades = pd.read_csv("approximate value data/AVbyPositionGroup.csv")

def correctData(df, pprTF):
  #cols to make per game
  cols = ['completions', 'attempts', 'passing_yards',
       'passing_tds', 'interceptions', 'sacks',
       'sack_fumbles_lost', 'passing_air_yards', 'passing_yards_after_catch',
       'passing_first_downs', 'passing_2pt_conversions',
       'carries', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'rushing_first_downs',
       'rushing_2pt_conversions', 'fantasy_points', 'age']

  #basing data if ppr or not
  if pprTF == 2:
    pass
  elif pprTF == 0:
    pass
  elif pprTF == 1:
    pass

    
  #adding ppg column
  df.loc[:, 'PPG'] = df['fantasy_points'] / df['GP']


  #make all columns in a per game basis
  for col in cols:
    df.loc[:, col] = df[col] / df['GP'] 


  #only players with more than 7 games.
  df = df[df.GP > 7]
  df = df[df.fantasy_points >= 0]

  df = df[df.PPG > 5]
  

  return df

#removes unneccesary stats
def removeUnwanted(dfPos, pos):
  dfPos = dfPos.drop(columns=['season',"GP", "season_type", "fantasy_points", "player_display_name", "player_id", "team", "position"])
  return dfPos

#shifts data forward one year
def makeCorrectShift(df):
  shifters = ['PPG','season','GP','season_type','age','fantasy_points','completions','attempts','passing_yards','passing_tds','interceptions','sacks','sack_fumbles_lost','passing_air_yards','passing_yards_after_catch','passing_first_downs','passing_2pt_conversions','carries','rushing_yards','rushing_tds','rushing_fumbles_lost','rushing_first_downs','rushing_2pt_conversions']
  
  #adds target variable
  df["PPG"] = df["PPG"]
  
  #shifts it forward a year (for example 2011 goes to 2012)
  df[shifters] = df.groupby('player_display_name')[shifters].shift(1)
  df = df.dropna()

  return df

#where machine learning is done. returns the model and score.
from sklearn.inspection import permutation_importance

def machineLearning(df, arr, dictParam):
    # Define predictors excluding the target variable
    predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']

    # Split the data
    x = df[predictors].values
    y = df["PPG"].values
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

    # Initialize and train GradientBoostingRegressor
    gbr = GradientBoostingRegressor(**dictParam)
    gbr.fit(x_train, y_train)

    # Evaluate the model
    predict_test = gbr.predict(x_test)
    mae = mean_absolute_error(y_test, predict_test)

    predict_test_unscaled = predict_test * (arr[1] - arr[0]) + arr[0]
    y_test_unscaled = y_test * (arr[1] - arr[0]) + arr[0]

    # Calculate permutation importance
    r = permutation_importance(gbr, x_test, y_test, n_repeats=100, random_state=0)

    # Organize importances
    importance_dict = {name: score for name, score in zip(predictors, r.importances_mean)}
    sorted_importances = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)
    
    for feature, importance in sorted_importances:
      print(f"{feature}: {importance}")


    return [mae, gbr]

# Example usage of the modified function


def getBestParams(df, arr):

  #make the predictors and data and test sets correctly
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  
  #make the parameters to search over. for hidden_layer_sizes, I experimented with alot and the ones listed now is just final set of experiment.
  
  grid = {
      'n_estimators': [100, 200, 300],
      'learning_rate': [0.01, 0.1, 0.2],
      'max_depth': [3, 4, 5],
      'min_samples_split': [2, 3, 4]
  }

  #create an MLPRegressor object
  gbr = GradientBoostingRegressor()

  #create a GridSearchCV object and fit it to the training data
  grid_search = GridSearchCV(gbr, param_grid=grid, cv=5, n_jobs=-1)
  grid_search.fit(x_train, y_train)

  #the best model to make predictions on the test data and evaluate performance
  y_pred = grid_search.predict(x_test)

  #inverse transform the scaled predictions to get the original scale, uses a reverse of original formula
  for i in range(len(y_pred)):
    y_pred[i] = (y_pred[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]


  print(mean_absolute_error(y_test, y_pred))

  return grid_search.best_params_

#gets original value for fantasy points for predictions.
def getScaleBack(df):
  #index of column
  column_index = df.columns.get_loc("PPG")

  #min value of column:
  min_value = df["PPG"].min()

  #scaling valye of column
  #scaling_factor = scaler.scale_[column_index]
  max_value = df["PPG"].max()

  #array to be used later to scale each data
  arr = [min_value, max_value]

  return arr

def test(df, model, arr):
  #make columns everything but target
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']


  #make train and test sets
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  #make the predictions
  predict_test = model.predict(x_test)

  #inverse transform the scaled predictions to get the original scale by reversing formula
  for i in range(len(predict_test)):
    predict_test[i] = (predict_test[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]

  #average error 
  mae = mean_absolute_error(y_test, predict_test)
  print("test ", mae)

#if ppr is 0, than it is non ppr. if 1, then it is half ppr. if 2, full ppr. loops through each.
for ppr in [0,1,2]:

  dfFantasyCopy = dfFantasy.copy()

  dfFantasyCopy = correctData(dfFantasyCopy, ppr)

  dfFantasyCopy = makeCorrectShift(dfFantasyCopy)

  dfFantasyCopy = dfFantasyCopy.loc[dfFantasyCopy["season"] != 2012]

  dfFantasyCopy = removeUnwanted(dfFantasyCopy, "QB")

  dfFantasyCopy = dfFantasyCopy.reset_index(drop=True)

  #gets fantasy_points_ppr scale per each position
  scaleQB = getScaleBack(dfFantasyCopy)

  dfFantasyCopy[dfFantasyCopy.columns] = scaler.fit_transform(dfFantasyCopy[dfFantasyCopy.columns])

  #obtained by running the getBestParams function per each respective position
  paramQB = getBestParams(dfFantasyCopy, scaleQB)

  #makes array of model and score, then prints it
  qbArray = machineLearning(dfFantasyCopy, scaleQB, paramQB)
  num = qbArray[0]
  qbModel = qbArray[1]
  print("qb score(ppg off on average per player): ", num)

  if ppr == 0:
      joblib.dump(qbModel, "qb models/qbModelNonPPR.joblib")
  elif ppr == 1:
      joblib.dump(qbModel, "qb models/qbModelHalfPPR.joblib")
  elif ppr == 2:
      joblib.dump(qbModel, "qb models/qbModelPPR.joblib")



0.8249424962974933
passing_tds: 0.6266619665385622
passing_yards: 0.0988907952525303
rushing_yards: 0.09231895379768502
carries: 0.07935285058211289
rushing_tds: 0.0718703700638194
rushing_first_downs: 0.05195962132550578
passing_first_downs: 0.027516317260428993
interceptions: 0.019939035370580928
passing_yards_after_catch: 0.004137192384833498
passing_air_yards: 0.00202327312487259
passing_2pt_conversions: 0.0015879698827468414
sack_fumbles_lost: 0.0014632733827842526
qb: 0.001190850852694445
oline: 0.0007673859619806933
rushing_fumbles_lost: 0.0005723251541924956
wrte: 0.0004643831881447158
dst: 5.207888681413309e-07
rushing_2pt_conversions: -0.00028213546292275014
rb: -0.0005968737338998587
age: -0.0008649252020371978
completions: -0.0010677530583316553
attempts: -0.0014007366948538325
sacks: -0.002754858395207249
qb score(ppg off on average per player):  0.03342570050181966
0.7967349201234002
passing_tds: 0.6162275256462472
rushing_yards: 0.11796849697656267
passing_yards: 0.10431

In [2]:
#RB ML MODEL
import pandas as pd
import numpy as np
import warnings
from sklearn.preprocessing import MinMaxScaler

pd.options.mode.chained_assignment = None

import sklearn
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

from sklearn.model_selection import train_test_split
import joblib


#scaler to scale data
scaler = MinMaxScaler()

#read csv files into pandas
dfFantasy = pd.read_csv("final position group data/final_rb_data.csv")
dfFantasy.replace([np.inf, -np.inf], np.nan, inplace=True)
numeric_cols = dfFantasy.select_dtypes(include=[np.number]).columns
for column in numeric_cols:
    dfFantasy[column].fillna(dfFantasy[column].mean(), inplace=True)
dfGrades = pd.read_csv("approximate value data/AVbyPositionGroup.csv")

def correctData(df, pprTF):
  #cols to make per game
  cols = ['carries', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_2pt_conversions', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs',
       'receiving_2pt_conversions', 'special_teams_tds', 'fantasy_points', 'rrtd', 'age']

  #basing data if ppr or not
  if pprTF == 2:
    pass
  elif pprTF == 0:
    df.loc[:, "fantasy_points"] = df["fantasy_points"] - df["receptions"]
  elif pprTF == 1:
    df.loc[:, "fantasy_points"] = df["fantasy_points"] - (df["receptions"]/2)

    
  #adding ppg column
  df.loc[:, 'PPG'] = df['fantasy_points'] / df['GP']


  #make all columns in a per game basis
  for col in cols:
    df.loc[:, col] = df[col] / df['GP'] 


  #only players with more than 7 games.
  df = df[df.GP > 7]
  df = df[df.fantasy_points >= 0]

  df = df[df.PPG > 2]
  

  return df

#removes unneccesary stats
def removeUnwanted(dfPos, pos):
  dfPos = dfPos.drop(columns=['season',"GP", "season_type", "fantasy_points", "player_display_name", "player_id", "team", "position"])
  return dfPos

#shifts data forward one year
def makeCorrectShift(df):
  shifters = ['player_id', 'season', 'player_display_name', 'team', 'GP', 'position',
       'age','season_type', 'carries', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'rushing_first_downs',
       'rushing_2pt_conversions', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs',
       'receiving_2pt_conversions', 'special_teams_tds', 'fantasy_points',
       'rrtd']
  
  #adds target variable
  df["PPG"] = df["PPG"]
  
  #shifts it forward a year (for example 2011 goes to 2012)
  df[shifters] = df.groupby('player_display_name')[shifters].shift(1)
  df = df.dropna()

  return df

#where machine learning is done. returns the model and score.
from sklearn.inspection import permutation_importance

def machineLearning(df, arr, dictParam):
    # Define predictors excluding the target variable
    predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']

    # Split the data
    x = df[predictors].values
    y = df["PPG"].values
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

    # Initialize and train GradientBoostingRegressor
    gbr = GradientBoostingRegressor(**dictParam)
    gbr.fit(x_train, y_train)

    # Evaluate the model
    predict_test = gbr.predict(x_test)
    mae = mean_absolute_error(y_test, predict_test)

    predict_test_unscaled = predict_test * (arr[1] - arr[0]) + arr[0]
    y_test_unscaled = y_test * (arr[1] - arr[0]) + arr[0]

    # print("Predicted vs Actual PPG (unscaled):")
    # for pred, actual in zip(predict_test_unscaled, y_test_unscaled):
    #     print(f"Predicted: {pred:.2f}, Actual: {actual:.2f}")

    # Calculate permutation importance
    r = permutation_importance(gbr, x_test, y_test, n_repeats=100, random_state=0)

    # Organize importances
    importance_dict = {name: score for name, score in zip(predictors, r.importances_mean)}
    sorted_importances = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)

    for feature, importance in sorted_importances:
        print(f"{feature}: {importance}")
    


    return [mae, gbr, sorted_importances]

# Example usage of the modified function


def getBestParams(df, arr):

  #make the predictors and data and test sets correctly
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  
  #make the parameters to search over. for hidden_layer_sizes, I experimented with alot and the ones listed now is just final set of experiment.
  
  grid = {
      'n_estimators': [100, 200, 300],
      'learning_rate': [0.01, 0.1, 0.2],
      'max_depth': [3, 4, 5],
      'min_samples_split': [2, 3, 4]
  }

  #create an MLPRegressor object
  gbr = GradientBoostingRegressor()

  #create a GridSearchCV object and fit it to the training data
  grid_search = GridSearchCV(gbr, param_grid=grid, cv=5, n_jobs=-1)
  grid_search.fit(x_train, y_train)

  #the best model to make predictions on the test data and evaluate performance
  y_pred = grid_search.predict(x_test)

  #inverse transform the scaled predictions to get the original scale, uses a reverse of original formula
  for i in range(len(y_pred)):
    y_pred[i] = (y_pred[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]


  # print(mean_absolute_error(y_test, y_pred))

  return grid_search.best_params_

#gets original value for fantasy points for predictions.
def getScaleBack(df):
  #index of column
  column_index = df.columns.get_loc("PPG")

  #min value of column:
  min_value = df["PPG"].min()

  #scaling valye of column
  #scaling_factor = scaler.scale_[column_index]
  max_value = df["PPG"].max()

  #array to be used later to scale each data
  arr = [min_value, max_value]

  return arr

def test(df, model, arr):
  #make columns everything but target
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']


  #make train and test sets
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  #make the predictions
  predict_test = model.predict(x_test)

  #inverse transform the scaled predictions to get the original scale by reversing formula
  for i in range(len(predict_test)):
    predict_test[i] = (predict_test[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]

  #average error 
  mae = mean_absolute_error(y_test, predict_test)
  print("test ", mae)

#if ppr is 0, than it is non ppr. if 1, then it is half ppr. if 2, full ppr. loops through each.
for ppr in [0,1,2]:

  dfFantasyCopy = dfFantasy.copy()

  dfFantasyCopy = correctData(dfFantasyCopy, ppr)

  dfFantasyCopy = makeCorrectShift(dfFantasyCopy)

  dfFantasyCopy = dfFantasyCopy.loc[dfFantasyCopy["season"] != 2012]

  dfFantasyCopy = removeUnwanted(dfFantasyCopy, "RB")

  dfFantasyCopy = dfFantasyCopy.reset_index(drop=True)

  #gets fantasy_points_ppr scale per each position
  scaleRB = getScaleBack(dfFantasyCopy)

  dfFantasyCopy[dfFantasyCopy.columns] = scaler.fit_transform(dfFantasyCopy[dfFantasyCopy.columns])

  #obtained by running the getBestParams function per each respective position
  paramRB = getBestParams(dfFantasyCopy, scaleRB)

  #makes array of model and score, then prints it
  rbArray = machineLearning(dfFantasyCopy, scaleRB, paramRB)
  num = rbArray[0]
  rbModel = rbArray[1]

  print("rb score(ppg off on average per player): ", num)
  if ppr == 0:
      joblib.dump(rbModel, "rb models/rbModelNonPPR.joblib")
  elif ppr == 1:
      joblib.dump(rbModel, "rb models/rbModelHalfPPR.joblib")
  elif ppr == 2:
      joblib.dump(rbModel, "rb models/rbModelPPR.joblib")
#print(dfFantasyRB.columns)

rushing_yards: 0.20281412036782837
rb: 0.08913783341599844
carries: 0.040875710974054416
rushing_first_downs: 0.017864461816411457
rushing_fumbles_lost: 0.00420155980055692
dst: 0.0029457834557763507
wrte: 0.0014086212737451798
rrtd: 0.0008177730913153081
rushing_tds: 0.00015068693383714306
special_teams_tds: 0.00014294052556384162
receiving_yards: 8.044928364969617e-05
rushing_2pt_conversions: 0.0
receiving_fumbles_lost: 0.0
receiving_2pt_conversions: 0.0
qb: -0.00016302263892763568
receiving_tds: -0.0014143564840335798
oline: -0.001982430443263473
targets: -0.007451721407986442
receiving_air_yards: -0.0076191445454837315
receptions: -0.010498652240841597
receiving_yards_after_catch: -0.012634756744631149
receiving_first_downs: -0.028722071952736076
age: -0.032580616170340464
rb score(ppg off on average per player):  0.13962562375378734
rushing_yards: 0.21875678569388363
rb: 0.038127420233495586
age: 0.034651919024522845
rushing_first_downs: 0.025239581038271043
rrtd: 0.01771948414030

In [3]:
#WRTE ML MODEL
import pandas as pd
import numpy as np
import warnings
from sklearn.preprocessing import MinMaxScaler

pd.options.mode.chained_assignment = None

import sklearn
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

from sklearn.model_selection import train_test_split
import joblib


#scaler to scale data
scaler = MinMaxScaler()

#read csv files into pandas
dfFantasy = pd.read_csv("final position group data/final_wrte_data.csv")
dfFantasy.replace([np.inf, -np.inf], np.nan, inplace=True)
numeric_cols = dfFantasy.select_dtypes(include=[np.number]).columns
for column in numeric_cols:
    dfFantasy[column].fillna(dfFantasy[column].mean(), inplace=True)
dfGrades = pd.read_csv("approximate value data/AVbyPositionGroup.csv")

def correctData(df, pprTF):
  #cols to make per game
  cols = ['carries', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_2pt_conversions', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs',
       'receiving_2pt_conversions', 'special_teams_tds', 'fantasy_points', 'rrtd', 'age']

  #basing data if ppr or not
  if pprTF == 2:
    pass
  elif pprTF == 0:
    df.loc[:, "fantasy_points"] = df["fantasy_points"] - df["receptions"]
  elif pprTF == 1:
    df.loc[:, "fantasy_points"] = df["fantasy_points"] - (df["receptions"]/2)

    
  #adding ppg column
  df.loc[:, 'PPG'] = df['fantasy_points'] / df['GP']


  #make all columns in a per game basis
  for col in cols:
    df.loc[:, col] = df[col] / df['GP'] 


  #only players with more than 7 games.
  df = df[df.GP > 7]
  df = df[df.fantasy_points >= 0]

  df = df[df.PPG > 2]
  

  return df

#removes unneccesary stats
def removeUnwanted(dfPos, pos):
  dfPos = dfPos.drop(columns=['season',"GP", "season_type", "fantasy_points", "player_display_name", "player_id", "team", "position"])
  return dfPos

#shifts data forward one year
def makeCorrectShift(df):
  shifters = ['player_id', 'season', 'player_display_name', 'team', 'GP', 'position',
       'age', 'season_type', 'carries', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'rushing_first_downs',
       'rushing_2pt_conversions', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs',
       'receiving_2pt_conversions', 'special_teams_tds', 'fantasy_points',
       'rrtd']
  
  #adds target variable
  df["PPG"] = df["PPG"]
  
  #shifts it forward a year (for example 2011 goes to 2012)
  df[shifters] = df.groupby('player_display_name')[shifters].shift(1)
  df = df.dropna()

  return df

#where machine learning is done. returns the model and score.
from sklearn.inspection import permutation_importance

def machineLearning(df, arr, dictParam):
    # Define predictors excluding the target variable
    predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']

    # Split the data
    x = df[predictors].values
    y = df["PPG"].values
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

    # Initialize and train MLPRegressor
    gbr = GradientBoostingRegressor(**dictParam)
    gbr.fit(x_train, y_train)

    # Evaluate the model
    predict_test = gbr.predict(x_test)
    mae = mean_absolute_error(y_test, predict_test)

    predict_test_unscaled = predict_test * (arr[1] - arr[0]) + arr[0]
    y_test_unscaled = y_test * (arr[1] - arr[0]) + arr[0]

    # print("Predicted vs Actual PPG (unscaled):")
    # for pred, actual in zip(predict_test_unscaled, y_test_unscaled):
    #     print(f"Predicted: {pred:.2f}, Actual: {actual:.2f}")

    # Calculate permutation importance
    r = permutation_importance(gbr, x_test, y_test, n_repeats=100, random_state=0)

    # Organize importances
    importance_dict = {name: score for name, score in zip(predictors, r.importances_mean)}
    sorted_importances = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)

    # Print sorted importances
    for feature, importance in sorted_importances:
        print(f"{feature}: {importance}")
    


    return [mae, gbr, sorted_importances]

# Example usage of the modified function


def getBestParams(df, arr):

  #make the predictors and data and test sets correctly
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  
  #make the parameters to search over. for hidden_layer_sizes, I experimented with alot and the ones listed now is just final set of experiment.
  
  grid = {
      'n_estimators': [100, 200, 300],
      'learning_rate': [0.01, 0.1, 0.2],
      'max_depth': [3, 4, 5],
      'min_samples_split': [2, 3, 4]
  }

  #create an MLPRegressor object
  gbr = GradientBoostingRegressor()

  #create a GridSearchCV object and fit it to the training data
  grid_search = GridSearchCV(gbr, param_grid=grid, cv=5, n_jobs=-1)
  grid_search.fit(x_train, y_train)

  #the best model to make predictions on the test data and evaluate performance
  y_pred = grid_search.predict(x_test)

  #inverse transform the scaled predictions to get the original scale, uses a reverse of original formula
  for i in range(len(y_pred)):
    y_pred[i] = (y_pred[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]


  # print(mean_absolute_error(y_test, y_pred))

  return grid_search.best_params_

#gets original value for fantasy points for predictions.
def getScaleBack(df):
  #index of column
  column_index = df.columns.get_loc("PPG")

  #min value of column:
  min_value = df["PPG"].min()

  #scaling valye of column
  #scaling_factor = scaler.scale_[column_index]
  max_value = df["PPG"].max()

  #array to be used later to scale each data
  arr = [min_value, max_value]

  return arr

def test(df, model, arr):
  #make columns everything but target
  predictors = [col for col in df.columns if col != "PPG" and 'Unnamed' not in col and col != 'YearsBack']


  #make train and test sets
  x = df[predictors].values
  y = df["PPG"].values
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=40)

  #make the predictions
  predict_test = model.predict(x_test)

  #inverse transform the scaled predictions to get the original scale by reversing formula
  for i in range(len(predict_test)):
    predict_test[i] = (predict_test[i]*(arr[1] - arr[0])) + arr[0]
  for i in range(len(y_test)):
    y_test[i] = (y_test[i]*(arr[1] - arr[0])) + arr[0]

  #average error 
  mae = mean_absolute_error(y_test, predict_test)
  # print("test ", mae)

#if ppr is 0, than it is non ppr. if 1, then it is half ppr. if 2, full ppr. loops through each.
for ppr in [0,1,2]:

  dfFantasyCopy = dfFantasy.copy()

  dfFantasyCopy = correctData(dfFantasyCopy, ppr)
  
  dfFantasyCopy = makeCorrectShift(dfFantasyCopy)

  dfFantasyCopy = dfFantasyCopy.loc[dfFantasyCopy["season"] != 2012]

  dfFantasyCopy = removeUnwanted(dfFantasyCopy, "WRTE")

  dfFantasyCopy = dfFantasyCopy.reset_index(drop=True)

  #gets fantasy_points_ppr scale per each position
  scaleWRTE = getScaleBack(dfFantasyCopy)

  dfFantasyCopy[dfFantasyCopy.columns] = scaler.fit_transform(dfFantasyCopy[dfFantasyCopy.columns])

  #obtained by running the getBestParams function per each respective position
  paramWRTE = getBestParams(dfFantasyCopy, scaleWRTE)

  #makes array of model and score, then prints it
  wrteArray = machineLearning(dfFantasyCopy, scaleWRTE, paramWRTE)
  num = wrteArray[0]
  wrteModel = wrteArray[1]

  print("wrte score(ppg off on average per player): ", num)
  if ppr == 0:
      joblib.dump(wrteModel, "wrte models/wrteModelNonPPR.joblib")
  elif ppr == 1:
      joblib.dump(wrteModel, "wrte models/wrteModelHalfPPR.joblib")
  elif ppr == 2:
      joblib.dump(wrteModel, "wrte models/wrteModelPPR.joblib")

receiving_air_yards: 0.1252131697268834
receiving_yards_after_catch: 0.11004595216920647
qb: 0.03231817528397688
wrte: 0.02988979714896725
targets: 0.028040083461295245
oline: 0.013920920690549454
receiving_fumbles_lost: 0.007687972869702055
receiving_first_downs: 0.0026183161429095625
receiving_2pt_conversions: 0.0007581389399461146
dst: 0.00023813778888016836
rushing_fumbles_lost: 0.0
rushing_2pt_conversions: 0.0
special_teams_tds: 0.0
carries: -0.000656084341637837
rushing_first_downs: -0.0012753596261482513
rrtd: -0.002586166316200752
receiving_tds: -0.0037045193976893574
receiving_yards: -0.004570537246859706
rushing_yards: -0.006044676303863912
age: -0.006385783015300411
rushing_tds: -0.010757737786323855
rb: -0.012037887085616879
receptions: -0.01549237534816152
wrte score(ppg off on average per player):  0.11532339659903262
receiving_yards: 0.16056983093517915
receiving_air_yards: 0.14296319183221415
qb: 0.02734837809713282
receiving_yards_after_catch: 0.021551770212817022
wrte