In [1]:
#from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
import pickle
import pandas as pd
import torch

In [9]:
MY_UNIQUE_ID = "martinaallaseconda"


# Output: unique ID of the team
def getName():
    return MY_UNIQUE_ID

# Input: Test dataframe
# Output: PreProcessed test dataframe
def preprocess(df, clfName):

    if ((clfName == "RF") or (clfName == "LR") or (clfName == "SVR") or (clfName == "KNR") or (clfName == "FF")):
        X = df.drop(columns=['Year']) 
        y = df['Year'] 
        scaler = pickle.load(open("scaler.save", 'rb'))
        X = pd.DataFrame(scaler.transform(X))
        dfNew = pd.concat([X, y], axis = 1)
        return dfNew
    else: # No scaler per TabNet e TabTransformer
        return df


# Input: Regressor name ("lr": Linear Regression, "SVR": Support Vector Regressor)
# Output: Regressor object
def load(clfName):
    if (clfName == "RF"):
        clf = pickle.load(open("rf.save", 'rb'))
        return clf
    elif (clfName == "LR"):
        clf = pickle.load(open("lr.save", 'rb'))
        return clf
    elif (clfName == "SVR"):
        clf = pickle.load(open("svr.save", 'rb'))
        return clf
    elif (clfName == "KNR"):
        clf = pickle.load(open("knn.save", 'rb'))
        return clf
    elif (clfName == "FF"):
        clf = pickle.load(open("ff4.save", 'rb'))
        return clf
    elif (clfName == "TB"):
        clf = pickle.load(open("tb_tabnet4.save", 'rb'))
        return clf
    elif (clfName == "TF"):
        clf = pickle.load(open("tabTransf.save", 'rb'))
        #clf = torch.load("tabTransf_best.save")
        return clf
    else:
        return None



# Input: PreProcessed dataset, Regressor Name, Regressor Object 
# Output: Performance dictionary
def predict(df, clfName, clf):
    X = df.drop(columns=['Year']) 
    y = df['Year'] 
    
    # Tabular
    if ((clfName == "TB") or (clfName == "TF")):
        ypred = clf.predict(df)  

    # Rete FF
    elif (clfName == "FF"):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        X = torch.FloatTensor(X.values).to(device)
        y = torch.FloatTensor(y.values).view(-1, 1).to(device)
        clf.eval()

        with torch.no_grad():  
            ypred_ = clf(X)  # Ottengo le predizioni
            ypred = ypred_.cpu().numpy()  # Converto le predizioni da tensor a numpy array per calcolare le metriche
            
        y = y.cpu().numpy()  # Converto y in numpy array per calcolare le metriche

    else:
        X = X.values  
        y = y.values  
        ypred = clf.predict(X)  


    mse = mean_squared_error(y, ypred)
    mae = mean_absolute_error(y, ypred)
    mape = mean_absolute_percentage_error(y, ypred)
    r2 = r2_score(y, ypred)
    
    performance_metrics = {
        "mse": mse, 
        "mae": mae, 
        "mape": mape, 
        "r2square": r2
    }
    
    return performance_metrics

In [10]:
FILENAME = "../../train.csv"
CLF_NAME_LIST = [ "LR", "SVR", "KNR", "RF", "FF", "TB", "TF" ]
df = pd.read_csv(FILENAME)

#Esecuzione degli algoritmi
for modelName in CLF_NAME_LIST:
    dfProcessed = preprocess(df, modelName)
    clf = load(modelName)
    perf = predict(dfProcessed, modelName, clf)
    print("RESULT team: "+str(getName())+" algoName: "+ modelName + " perf: "+ str(perf))

RESULT team: martinaallaseconda algoName: LR perf: {'mse': 94.86937247190434, 'mae': 7.149512761595671, 'mape': 0.003586904900443591, 'r2square': 0.1391327431512942}


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


RESULT team: martinaallaseconda algoName: SVR perf: {'mse': 91.47183514359104, 'mae': 6.153736646945415, 'mape': 0.003095103101082883, 'r2square': 0.16996280520037488}
RESULT team: martinaallaseconda algoName: KNR perf: {'mse': 35.283020184731946, 'mae': 3.4413370408672654, 'mape': 0.0017259666133985338, 'r2square': 0.6798334804126274}
RESULT team: martinaallaseconda algoName: RF perf: {'mse': 129.92290079765968, 'mae': 6.846464760422883, 'mape': 0.003428389009726271, 'r2square': -0.1789513127077047}
RESULT team: martinaallaseconda algoName: FF perf: {'mse': 153.35834, 'mae': 10.484416, 'mape': 0.0052495836, 'r2square': -0.39161014556884766}


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


RESULT team: martinaallaseconda algoName: TB perf: {'mse': 1186.0528118394618, 'mae': 11.671268911156531, 'mape': 0.0058474796987953125, 'r2square': -9.762525396785058}


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


RESULT team: martinaallaseconda algoName: TF perf: {'mse': 94.56639802458791, 'mae': 7.1592557101846745, 'mape': 0.0035916942021247617, 'r2square': 0.14188200536902196}
