In [None]:
import pandas as pd
import csv, datetime
import numpy as np
import xgboost as xgb
import fastai as fast
from sklearn.metrics import *
from sklearn.feature_selection import SelectFromModel
from xgboost.sklearn import XGBClassifier
from sklearn import metrics   #Additional scklearn functions
from sklearn.model_selection import GridSearchCV   #Performing grid search
from sklearn.model_selection import train_test_split

#Scoring Function Based on combined adjusted AUC and adjusted Kappa
def scoreFunc(y_test, preds):
    rocAUC = roc_auc_score(y_test, preds)
    print('AUC: ' + str(rocAUC))
    roundedPreds = preds.round()
    Kappa = cohen_kappa_score(y_test,roundedPreds)

    AdjAUC = (rocAUC - 0.5) * 2 if (rocAUC>0.5) else 0
    print('AdjustedAUC: ' + str(AdjAUC))
    AdjKappa = Kappa if (Kappa > 0) else 0
    print('Kappa: ' + str(Kappa))
    print('AdjustedKappa: ' + str(AdjKappa))
    return AdjAUC + AdjKappa

#Scoring Function Based on combined adjusted AUC and adjusted Kappa
def evalFunc(preds, y_test):
    y_labels = y_test.get_label()
    rocAUC = roc_auc_score(y_labels, preds)
    roundedPreds = preds.round()
    Kappa = cohen_kappa_score(y_labels,roundedPreds)
    AdjAUC = (rocAUC - 0.5) * 2 if (rocAUC>0.5) else 0
    AdjKappa = Kappa if (Kappa > 0) else 0
    return 'rocAucKappa',float(1 - (AdjAUC + AdjKappa))

from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams.update({'font.size':12, 'figure.figsize':[10,7]})

In [None]:
#Read in Training Feature Data
train = pd.read_csv('../TrainingData/FullScrapeT10.csv')
target = 'Target'
IDcol = 'StudentID'
train = train.drop(['StudentID'],axis=1)
#remove duplicates
train = train.loc[:,~train.columns.str.endswith('.1')]

#Read in Hidden Feature Data
dataPred = pd.read_csv('../TrainingData/FullScrapeH10.csv')
idDF = pd.DataFrame(dataPred.StudentID)

#Convert from boolean to binary
train[target] *=1
train.head()

#Data Cleaning - Optional

#Remove more than threshold missing values
print("Removing columns with less than 10 values")
threshold = 10
train = train.dropna(axis=1,thresh=threshold)
train.head()

#In case Hidden and Training have different columns
#This is primarily for click rate for which additional columns are generated for every 10 clicks.
print("Removing columns not present in both training and hidden")
for col in train.columns:
    if (not col in dataPred.columns and not col==target):
        train = train.drop(col, axis=1)
        print(i)
        
#Seperate Target and Features
X, y = train.drop([target],axis=1),train[target]
#Sort X columns alphabetically
X = X.reindex(sorted(X.columns), axis=1)

#Split into training and validation set
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)


#XGBRegressor Model
xg_reg = xgb.XGBRegressor()

In [None]:
from sklearn.metrics import make_scorer
from sklearn.model_selection import RandomizedSearchCV

scorer = make_scorer(scoreFunc)
#kfold = StratifiedKFold(10,shuffle=True,random_state=1988)
parameters = {
    'objective':['binary:logistic'],
    'learning_rate':np.linspace(.01,.1,num=10,endpoint=True),
    'max_depth': range(3,8,1),
    'min_child_weight': range(1,7,1),
    'subsample': np.linspace(.1,1.0,num=10,endpoint=True),
    'colsample_bytree': np.linspace(.1,1.0,num=10,endpoint=True),
    'gamma':range(0,5,1),
    'n_estimators': [100]}

while(len(X.columns) > 30):
    xgb_grid = RandomizedSearchCV(xg_reg,  parameters,n_iter=20,n_jobs = -1,scoring=scorer, cv=10, verbose=True)

    xgb_grid.fit(X, y)

    best_model = xgb_grid.best_estimator_

    print(xgb_grid.best_score_)
    print(xgb_grid.best_params_)
    
    #Calculate important features based on number of trees in which they occur
    featImportArr = best_model.feature_importances_
    fScores = best_model.get_booster().get_fscore()
    fScores = fScores.items()
    fScores = sorted(fScores, key=lambda x: x[1])
    fScores = fScores[int(len(fScores)*0.25):]
    keptFeatures = [i[0] for i in fScores]

    #Only keep top 75% of features
    print("Removing columns based on feature importance")
    for col in train.columns:
        if (not col in keptFeatures and not col==target):
            train = train.drop(col, axis=1)
            print(col)

    #Seperate Target and Features
    X, y = train.drop([target],axis=1),train[target]
    print("Number of Features Remaining: ", len(X.columns))
    #Sort X columns alphabetically
    X = X.reindex(sorted(X.columns), axis=1)
    print("Number of Features Remaining: ", len(X.columns))

    #XGBRegressor Model
    xg_reg = xgb.XGBRegressor()
    
xgb_grid = RandomizedSearchCV(xg_reg,  parameters,n_iter=200,n_jobs = -1,scoring=scorer, cv=10, verbose=True)

xgb_grid.fit(X, y)

best_model = xgb_grid.best_estimator_

best_model = xgb_grid.best_estimator_

print(xgb_grid.best_score_)
print(xgb_grid.best_params_)

In [None]:
#Best Hyper-parameters and Validation Score
#0.6442671192414215
#{'subsample': 0.9, 'objective': 'binary:logistic', 'n_estimators': 90, 
# 'min_child_weight': 1, 'max_depth': 3, 'learning_rate': 0.07, 'gamma': 0, 'colsample_bytree': 0.8}

#OPTIONAL- Use Grid Search for more comprehensive hyperparameter tuning
#from sklearn.metrics import make_scorer
#from sklearn.model_selection import StratifiedKFold
#from sklearn.model_selection import RandomizedSearchCV

#scorer = make_scorer(scoreFunc)
#kfold = StratifiedKFold(10,shuffle=True,random_state=1988)
#parameters = {
#    'objective':['binary:logistic'],
#    'learning_rate':[.03],
#    'max_depth': [8],
#    'min_child_weight': [3],
#    'subsample': [0.7],
#    'colsample_bytree': [0.4],
#    'gamma':[6],
#    'n_estimators': range(190,210,2)}

#xgb_grid = GridSearchCV(xg_reg,  parameters,n_jobs = -1,scoring=scorer, cv=3, verbose=True)

#xgb_grid.fit(X_train,y_train)

#print(xgb_grid.best_score_)
#print(xgb_grid.best_params_)

In [None]:
xgb.plot_importance(best_model,max_num_features=10, title = "Feature Importance 10 Minute Model")
plt.rcParams['figure.figsize'] = [10, 7]
plt.savefig('FeatImport_10.png')


for col in dataPred.columns:
    if (not col in train.columns):
        dataPred = dataPred.drop(col, axis=1)
        
#dataPred= dataPred.drop('Target',axis=1)
dataPred = dataPred.reindex(sorted(dataPred.columns), axis=1)
print(dataPred.head())

preds = best_model.predict(dataPred)

predDF = pd.DataFrame(preds,columns=['Target'])
predDF.head()
output = pd.concat([idDF,predDF], axis = 1)
output.head()

output.to_csv('hidden10_Output.csv', index=False)

In [None]:
#20 Minute Model
#Read in Training Feature Data
train = pd.read_csv('../TrainingData/FullScrapeT20.csv')
target = 'Target'
IDcol = 'StudentID'
train = train.drop(['StudentID'],axis=1)
#remove duplicates
train = train.loc[:,~train.columns.str.endswith('.1')]

#Read in Hidden Feature Data
dataPred = pd.read_csv('../TrainingData/FullScrapeH20.csv')
idDF = pd.DataFrame(dataPred.StudentID)

#Convert from boolean to binary
train[target] *=1
train.head()

#Data Cleaning - Optional

#Remove more than threshold missing values
print("Removing columns with less than 10 values")
threshold = 10
train = train.dropna(axis=1,thresh=threshold)
train.head()

#In case Hidden and Training have different columns
#This is primarily for click rate for which additional columns are generated for every 10 clicks.
print("Removing columns not present in both training and hidden")
for col in train.columns:
    if (not col in dataPred.columns and not col==target):
        train = train.drop(col, axis=1)
        print(i)
        
#Seperate Target and Features
X, y = train.drop([target],axis=1),train[target]
#Sort X columns alphabetically
X = X.reindex(sorted(X.columns), axis=1)

#Split into training and validation set
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)

#XGBRegressor Model
xg_reg = xgb.XGBRegressor()

In [None]:
scorer = make_scorer(scoreFunc)
#kfold = StratifiedKFold(10,shuffle=True,random_state=1988)
parameters = {
    'objective':['binary:logistic'],
    'learning_rate':np.linspace(.01,.1,num=10,endpoint=True),
    'max_depth': range(3,8,1),
    'min_child_weight': range(1,7,1),
    'subsample': np.linspace(.1,1.0,num=10,endpoint=True),
    'colsample_bytree': np.linspace(.1,1.0,num=10,endpoint=True),
    'gamma':range(0,5,1),
    'n_estimators': [100]}

while(len(X.columns) > 30):
    xgb_grid = RandomizedSearchCV(xg_reg,  parameters,n_iter=20,n_jobs = -1,scoring=scorer, cv=10, verbose=True)

    xgb_grid.fit(X, y)

    best_model = xgb_grid.best_estimator_

    print(xgb_grid.best_score_)
    print(xgb_grid.best_params_)

    featImportArr = best_model.feature_importances_
    fScores = best_model.get_booster().get_fscore()
    fScores = fScores.items()
    fScores = sorted(fScores, key=lambda x: x[1])

    fScores = fScores[int(len(fScores)*0.25):]
    keptFeatures = [i[0] for i in fScores]
    keptFeatures

    #Only keep top 75% of features
    print("Removing columns based on feature importance")
    for col in train.columns:
        if (not col in keptFeatures and not col==target):
            train = train.drop(col, axis=1)
            print(col)

    #Seperate Target and Features
    X, y = train.drop([target],axis=1),train[target]
    print("Number of Features Remaining: ", len(X.columns))
    #Sort X columns alphabetically
    X = X.reindex(sorted(X.columns), axis=1)

    #XGBRegressor Model
    xg_reg = xgb.XGBRegressor(importance_type = 'weight')
    
xgb_grid = RandomizedSearchCV(xg_reg,  parameters,n_iter=200,n_jobs = -1,scoring=scorer, cv=10, verbose=True)

xgb_grid.fit(X, y)

best_model = xgb_grid.best_estimator_

best_model = xgb_grid.best_estimator_

print(xgb_grid.best_score_)
print(xgb_grid.best_params_)

In [None]:
#Best Hyper-parameters and Validation Score
#0.7593081688312175
#{'subsample': 1.0, 'objective': 'binary:logistic', 'n_estimators': 70, 'min_child_weight': 3,
# 'max_depth': 4, 'learning_rate': 0.05000000000000001, 'gamma': 3, 'colsample_bytree': 0.30000000000000004}

#OPTIONAL- Use Grid Search for more comprehensive hyperparameter tuning
#from sklearn.metrics import make_scorer
#from sklearn.model_selection import StratifiedKFold
#from sklearn.model_selection import RandomizedSearchCV

#scorer = make_scorer(scoreFunc)
#kfold = StratifiedKFold(10,shuffle=True,random_state=1988)
#parameters = {
#    'objective':['binary:logistic'],
#    'learning_rate':[.03],
#    'max_depth': [8],
#    'min_child_weight': [3],
#    'subsample': [0.7],
#    'colsample_bytree': [0.4],
#    'gamma':[6],
#    'n_estimators': range(190,210,2)}

#xgb_grid = GridSearchCV(xg_reg,  parameters,n_jobs = -1,scoring=scorer, cv=3, verbose=True)

#xgb_grid.fit(X_train,y_train)

#print(xgb_grid.best_score_)
#print(xgb_grid.best_params_)

In [None]:
xgb.plot_importance(best_model,max_num_features=10, title="Feature Importance 20 Minute Model")
plt.savefig('FeatImport_20.png')


for col in dataPred.columns:
    if (not col in train.columns):
        dataPred = dataPred.drop(col, axis=1)
        
#dataPred= dataPred.drop('Target',axis=1)
dataPred = dataPred.reindex(sorted(dataPred.columns), axis=1)
print(dataPred.head())

preds = best_model.predict(dataPred)

predDF = pd.DataFrame(preds,columns=['Target'])
predDF.head()
output = pd.concat([idDF,predDF], axis = 1)
output.head()

output.to_csv('hidden20_Output.csv', index=False)

In [None]:
#30 Minute Model
#Read in Training Feature Data
train = pd.read_csv('../TrainingData/FullScrapeT30.csv')
target = 'Target'
IDcol = 'StudentID'
train = train.drop(['StudentID'],axis=1)
#remove duplicates
train = train.loc[:,~train.columns.str.endswith('.1')]

#Read in Hidden Feature Data
dataPred = pd.read_csv('../TrainingData/FullScrapeH30.csv')
idDF = pd.DataFrame(dataPred.StudentID)


#Convert from boolean to binary
train[target] *=1
train.head()

#Data Cleaning - Optional

#Remove more than threshold missing values
print("Removing columns with less than 10 values")
threshold = 10
train = train.dropna(axis=1,thresh=threshold)
train.head()

#In case Hidden and Training have different columns
#This is primarily for click rate for which additional columns are generated for every 10 clicks.
print("Removing columns not present in both training and hidden")
for col in train.columns:
    if (not col in dataPred.columns and not col==target):
        train = train.drop(col, axis=1)
        print(i)
        
#Seperate Target and Features
X, y = train.drop([target],axis=1),train[target]
#Sort X columns alphabetically
X = X.reindex(sorted(X.columns), axis=1)

#Split into training and validation set
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)

#XGBRegressor Model
xg_reg = xgb.XGBRegressor()

In [None]:
scorer = make_scorer(scoreFunc)
#kfold = StratifiedKFold(10,shuffle=True,random_state=1988)
parameters = {
    'objective':['binary:logistic'],
    'learning_rate':np.linspace(.01,.1,num=10,endpoint=True),
    'max_depth': range(3,8,1),
    'min_child_weight': range(1,7,1),
    'subsample': np.linspace(.1,1.0,num=10,endpoint=True),
    'colsample_bytree': np.linspace(.1,1.0,num=10,endpoint=True),
    'gamma':range(0,5,1),
    'n_estimators': [100]}

while(len(X.columns) > 30):
    xgb_grid = RandomizedSearchCV(xg_reg,  parameters,n_iter=20,n_jobs = -1,scoring=scorer, cv=10, verbose=True)

    xgb_grid.fit(X, y)

    best_model = xgb_grid.best_estimator_

    print(xgb_grid.best_score_)
    print(xgb_grid.best_params_)

    featImportArr = best_model.feature_importances_
    fScores = best_model.get_booster().get_fscore()
    fScores = fScores.items()
    fScores = sorted(fScores, key=lambda x: x[1])

    fScores = fScores[int(len(fScores)*0.25):]
    keptFeatures = [i[0] for i in fScores]
    keptFeatures

    #Only keep top 75% of features.
    print("Removing columns based on feature importance")
    for col in train.columns:
        if (not col in keptFeatures and not col==target):
            train = train.drop(col, axis=1)
            print(col)
    #Seperate Target and Features
    X, y = train.drop([target],axis=1),train[target]
    print("Number of Features Remaining: ", len(X.columns))
    #Sort X columns alphabetically
    X = X.reindex(sorted(X.columns), axis=1)

    #XGBRegressor Model
    xg_reg = xgb.XGBRegressor(importance_type = 'weight')
    
xgb_grid = RandomizedSearchCV(xg_reg,  parameters,n_iter=200,n_jobs = -1,scoring=scorer, cv=10, verbose=True)

xgb_grid.fit(X, y)

best_model = xgb_grid.best_estimator_

best_model = xgb_grid.best_estimator_

print(xgb_grid.best_score_)
print(xgb_grid.best_params_)

In [None]:
#Best Hyper-parameters and Validation Score
#0.7842701038048573
#{'subsample': 0.6, 'objective': 'binary:logistic', 'n_estimators': 90, 'min_child_weight': 5,
# 'max_depth': 3, 'learning_rate': 0.06000000000000001, 'gamma': 2, 'colsample_bytree': 0.30000000000000004}

#OPTIONAL- Use Grid Search for more comprehensive hyperparameter tuning
#from sklearn.metrics import make_scorer
#from sklearn.model_selection import StratifiedKFold
#from sklearn.model_selection import RandomizedSearchCV

#scorer = make_scorer(scoreFunc)
#kfold = StratifiedKFold(10,shuffle=True,random_state=1988)
#parameters = {
#    'objective':['binary:logistic'],
#    'learning_rate':[.03],
#    'max_depth': [8],
#    'min_child_weight': [3],
#    'subsample': [0.7],
#    'colsample_bytree': [0.4],
#    'gamma':[6],
#    'n_estimators': range(190,210,2)}

#xgb_grid = GridSearchCV(xg_reg,  parameters,n_jobs = -1,scoring=scorer, cv=3, verbose=True)

#xgb_grid.fit(X_train,y_train)

#print(xgb_grid.best_score_)
#print(xgb_grid.best_params_)

In [None]:
xgb.plot_importance(best_model,max_num_features=10,title = "Feature Importance 30 Minute Model")
plt.rcParams['figure.figsize'] = [10, 7]
plt.savefig('FeatImport_30.png')


for col in dataPred.columns:
    if (not col in train.columns):
        dataPred = dataPred.drop(col, axis=1)
        
#dataPred= dataPred.drop('Target',axis=1)
dataPred = dataPred.reindex(sorted(dataPred.columns), axis=1)
print(dataPred.head())

preds = best_model.predict(dataPred)

predDF = pd.DataFrame(preds,columns=['Target'])
predDF.head()
output = pd.concat([idDF,predDF], axis = 1)
output.head()

output.to_csv('hidden30_Output.csv', index=False)

In [None]:
#Read in predictions and combine them
hidden10 = pd.read_csv('hidden10_Output.csv')
hidden20 = pd.read_csv('hidden20_Output.csv')
hidden30 = pd.read_csv('hidden30_Output.csv')
frames = [hidden10,hidden20,hidden30]
result = pd.concat(frames)

#Sort by StudentID to match test
hidden10 = hidden10.sort_values(by='StudentID')
hidden20 = hidden20.sort_values(by='StudentID')
hidden30 = hidden30.sort_values(by='StudentID')
result = result.sort_values(by=['StudentID'])


list_10_ids = hidden10['StudentID'].to_list()
list_20_ids = hidden20['StudentID'].to_list()
list_30_ids = hidden30['StudentID'].to_list()

################################################################################################################################
#Public Leaderboard Scoring

#Read in public leaderboard test
publicTargets = pd.read_csv('../EvaluationData/hidden_leaderboard.csv')

#filter for public leaderboard ids
list_of_ids = publicTargets['STUDENTID'].to_list()

public10Preds = hidden10[hidden10.StudentID.isin(list_of_ids)]
public10Targets = publicTargets[publicTargets.STUDENTID.isin(list_10_ids)]

public20Preds = hidden20[hidden20.StudentID.isin(list_of_ids)]
public20Targets = publicTargets[publicTargets.STUDENTID.isin(list_20_ids)]

public30Preds = hidden30[hidden30.StudentID.isin(list_of_ids)]
public30Targets = publicTargets[publicTargets.STUDENTID.isin(list_30_ids)]

publicPreds = result[result.StudentID.isin(list_of_ids)]

#Extract just the target values for scoring
publicPredsArr = publicPreds.loc[:,'Target']
publicTarArr = publicTargets.loc[:,'EfficientlyCompletedBlockB']
public10PredsArr = public10Preds.loc[:,'Target']
public10TarArr = public10Targets.loc[:,'EfficientlyCompletedBlockB']
public20PredsArr = public20Preds.loc[:,'Target']
public20TarArr = public20Targets.loc[:,'EfficientlyCompletedBlockB']
public30PredsArr = public30Preds.loc[:,'Target']
public30TarArr = public30Targets.loc[:,'EfficientlyCompletedBlockB']

print("Scoring on Public Leaderboard 10: \n")
leaderBoard = scoreFunc(public10TarArr, public10PredsArr)
print("Total: ", leaderBoard)

print("\nScoring on Public Leaderboard 20: \n")
leaderBoard = scoreFunc(public20TarArr, public20PredsArr)
print("Total: ", leaderBoard)

print("\nScoring on Public Leaderboard 30: \n")
leaderBoard = scoreFunc(public30TarArr, public30PredsArr)
print("Total: ", leaderBoard)

print("\nTotal Scoring on Public Leaderboard: \n")
hiddenLeaderBoard = scoreFunc(publicTarArr,publicPredsArr)
print("\nTotal: ", hiddenLeaderBoard)


################################################################################################################################
#Final Leaderboard Scoring
finalTargets = pd.read_csv('../EvaluationData/hidden_test.csv')


#filter for final leaderboard ids
list_of_ids = finalTargets['STUDENTID'].to_list()


final10Preds = hidden10[hidden10.StudentID.isin(list_of_ids)]
final10Targets = finalTargets[finalTargets.STUDENTID.isin(list_10_ids)]

final20Preds = hidden20[hidden20.StudentID.isin(list_of_ids)]
final20Targets = finalTargets[finalTargets.STUDENTID.isin(list_20_ids)]

final30Preds = hidden30[hidden30.StudentID.isin(list_of_ids)]
final30Targets = finalTargets[finalTargets.STUDENTID.isin(list_30_ids)]

finalPreds = result[result.StudentID.isin(list_of_ids)]

#Extract just the target values for scoring
final10PredsArr = final10Preds.loc[:,'Target']
final10TarArr = final10Targets.loc[:,'EfficientlyCompletedBlockB']
final20PredsArr = final20Preds.loc[:,'Target']
final20TarArr = final20Targets.loc[:,'EfficientlyCompletedBlockB']
final30PredsArr = final30Preds.loc[:,'Target']
final30TarArr = final30Targets.loc[:,'EfficientlyCompletedBlockB']
finalPredsArr = finalPreds.loc[:,'Target']
finalTarArr = finalTargets.loc[:,'EfficientlyCompletedBlockB']

print("\nScoring on final Leaderboard 10: \n")
leaderBoard = scoreFunc(final10TarArr, final10PredsArr)
print("Total: ", leaderBoard)

print("\nScoring on final Leaderboard 20: \n")
leaderBoard = scoreFunc(final20TarArr, final20PredsArr)
print("Total: ", leaderBoard)

print("\nScoring on final Leaderboard 30: \n")
leaderBoard = scoreFunc(final30TarArr, final30PredsArr)
print("Total: ", leaderBoard)

print("\nTotal Scoring on final Leaderboard: \n")
finalLeaderBoard = scoreFunc(finalTarArr, finalPredsArr)
print("Total: ", finalLeaderBoard)


################################################################################################################################
#Final Leaderboard Scoring
combinedTargets = pd.concat([finalTargets,publicTargets])

#Sort by Student ID to ensure same order
combinedTargets = combinedTargets.sort_values(by='STUDENTID')
result = result.sort_values(by='StudentID')

combined10Targets = combinedTargets[combinedTargets.STUDENTID.isin(list_10_ids)]

combined20Targets = combinedTargets[combinedTargets.STUDENTID.isin(list_20_ids)]

combined30Targets = combinedTargets[combinedTargets.STUDENTID.isin(list_30_ids)]




#Extract just the target values for scoring
combined10PredsArr = hidden10.loc[:,'Target']
final10TarArr = combined10Targets.loc[:,'EfficientlyCompletedBlockB']
combined20PredsArr = hidden20.loc[:,'Target']
final20TarArr = combined20Targets.loc[:,'EfficientlyCompletedBlockB']
combined30PredsArr = hidden30.loc[:,'Target']
final30TarArr = combined30Targets.loc[:,'EfficientlyCompletedBlockB']

resultPreds = result.loc[:,'Target']
publicTarArr = combinedTargets.loc[:,'EfficientlyCompletedBlockB']

print("\nScoring on Combined Leaderboard 10: \n")
leaderBoard = scoreFunc(final10TarArr, combined10PredsArr)
print("Total: ", leaderBoard)

print("\nScoring on Combined Leaderboard 20: \n")
leaderBoard = scoreFunc(final20TarArr, combined20PredsArr)
print("Total: ", leaderBoard)

print("\nScoring on Combined Leaderboard 30: \n")
leaderBoard = scoreFunc(final30TarArr, combined30PredsArr)
print("Total: ", leaderBoard)

print("\nTotal Combined Scoring: \n")
combinedLeaderBoard = scoreFunc(publicTarArr, resultPreds)
print("Total: ", combinedLeaderBoard)

print("\nCombined Leaderboards: ", (finalLeaderBoard + hiddenLeaderBoard)/2)