In [1]:
import torch
import torch.nn as nn

import torch.nn.functional as F # All functions that don't have any parameters
import numpy as np
from  boxScore  import boxScore
from sklearn.model_selection import KFold

from torch.utils.data import DataLoader # data management 
# 

from DatasetPrivate import MyDataset
import numpy as np

import wandb
import winsound


# ENSEMBLE LEARNING

# Random Forest

In [2]:
from sklearn.ensemble import RandomForestClassifier

years_array=["2018-19","2019-20","2020-21","2021-22"]

stats_array=["traditional","advance"]
best_config_array=[]
n_estimators_array=[50,75,100,150]
for years in years_array:
    for stats in stats_array:
        best_acc=-1
        best_estimators=-1
        best_max_depth=-1
        box_score=boxScore(years,stats)
        x_train, x_test, y_train, y_test=box_score.separation()
        y_train=np.array(y_train)
        y_test=np.array(y_test)
        for n_estimators in n_estimators_array:
            for i in range(2,20):
                clf=RandomForestClassifier(max_depth=i, random_state=42,n_estimators=n_estimators)
                clf.fit(x_train,y_train)
                prediction=clf.predict(x_test)
                count=0
                for x,y in zip(prediction,y_test):
                    if(x[0]==y[0]):
                        count+=1
                acc=count/len(x_test)
                if acc> best_acc:
                    best_acc=acc
                    best_estimators=n_estimators
                    best_max_depth=i
        print("For {} with {} stats the best accuracy is {} with {} estimators and max depth {}".format(years,stats,best_acc,best_estimators,best_max_depth))
        config={
                    "architecture": "RandomForest",
                    "dataset":years+"_"+stats,
                    "n_estimators":best_estimators,
                    "val_acc":best_acc,
                    "max_depth":best_max_depth
            }
        best_config_array.append(config)

For 2018-19 with traditional stats the best accuracy is 0.7 with 50 estimators and max depth 7
For 2018-19 with advance stats the best accuracy is 0.66 with 100 estimators and max depth 6
For 2019-20 with traditional stats the best accuracy is 0.72 with 100 estimators and max depth 14
For 2019-20 with advance stats the best accuracy is 0.7 with 100 estimators and max depth 7
For 2020-21 with traditional stats the best accuracy is 0.7 with 75 estimators and max depth 5
For 2020-21 with advance stats the best accuracy is 0.68 with 100 estimators and max depth 3
For 2021-22 with traditional stats the best accuracy is 0.68 with 150 estimators and max depth 10
For 2021-22 with advance stats the best accuracy is 0.5 with 100 estimators and max depth 3


In [38]:
def wandbWriteEnsembles(project_name,best_config_array,name_runs):
    for config in best_config_array:
        
        

        run = wandb.init(project=project_name, config=config)
        val_acc=config.pop("val_acc")

        name=name_runs
        run.name=name
        config = wandb.config
        wandb.log({
                'val_acc':val_acc
                }
                )
        run.finish()

In [None]:
wandb.login()

for years in years_array:
    for stats in stats_array:
        project_name="T"+stats[0:3].capitalize()+years[2:].replace("-","")+"runs"
        wandbWriteEnsembles(project_name,best_config_array,"radomforest"+years)

In [2]:
def transform_y(y_train):
    y_train_1d=[]
    for el1 in y_train:
        tmp=1
        if(el1[0]==1):
            tmp=0
        y_train_1d.append(tmp)
    return y_train_1d

# ADA

In [30]:
from sklearn.ensemble import AdaBoostClassifier

best_acc=-1
clf = AdaBoostClassifier(n_estimators=100, random_state=42)
y_train_1d=transform_y(y_train)
y_test_1d=transform_y(y_test)
clf.fit(x_train,y_train_1d)
prediction=clf.predict(x_test)
count=0
for x,y in zip(prediction,y_test_1d):
    if(x==y):
        count+=1
acc=count/len(x_test)
if acc> best_acc:
    best_acc=acc
print(best_acc)

0.54


In [3]:
from sklearn.ensemble import AdaBoostClassifier

years_array=["2018-19","2019-20","2020-21","2021-22"]

stats_array=["traditional","advance"]
best_config_array=[]
n_estimators_array=[50,75,100,150]
for years in years_array:
    for stats in stats_array:
        best_acc=-1
        best_estimators=-1
        box_score=boxScore(years,stats)
        x_train, x_test, y_train, y_test=box_score.separation()
        y_train=np.array(y_train)
        y_test=np.array(y_test)
        for n_estimators in n_estimators_array:
            clf = AdaBoostClassifier(n_estimators=n_estimators, random_state=42)
            y_train_1d=transform_y(y_train)
            y_test_1d=transform_y(y_test)
            clf.fit(x_train,y_train_1d)
            prediction=clf.predict(x_test)
            count=0
            for x,y in zip(prediction,y_test_1d):
                if(x==y):
                    count+=1
            acc=count/len(x_test)
            if acc> best_acc:
                best_acc=acc
                best_estimators=n_estimators
        print("For {} with {} stats the best accuracy is {} with {} estimators".format(years,stats,best_acc,best_estimators))
        config={
                    "architecture": "Adaboost",
                    "dataset":years+"_"+stats,
                    "n_estimators":best_estimators,
                    "val_acc":best_acc
            }
        best_config_array.append(config)

For 2018-19 with traditional stats the best accuracy is 0.58 with 75 estimators
For 2018-19 with advance stats the best accuracy is 0.72 with 150 estimators
For 2019-20 with traditional stats the best accuracy is 0.78 with 75 estimators
For 2019-20 with advance stats the best accuracy is 0.7 with 150 estimators
For 2020-21 with traditional stats the best accuracy is 0.76 with 100 estimators
For 2020-21 with advance stats the best accuracy is 0.74 with 100 estimators
For 2021-22 with traditional stats the best accuracy is 0.58 with 150 estimators
For 2021-22 with advance stats the best accuracy is 0.68 with 100 estimators


In [None]:
wandb.login()

for years in years_array:
    for stats in stats_array:
        project_name="T"+stats[0:3].capitalize()+years[2:].replace("-","")+"runs"
        wandbWriteEnsembles(project_name,best_config_array,"adaboost"+years)