In [43]:
%run ./pre_processing.ipynb
import pandas as pd
folds: list[pd.DataFrame]

In [44]:
folds

[      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
 0         -0.474615          0.427208    -0.334668        1.104872   1.540411   
 1         -0.783154          1.034607    -1.229258       -0.744721  -0.000967   
 2         -0.397480         -0.544630    -0.403482       -0.818284  -0.429127   
 3          0.142462         -1.091289    -0.059409        0.295675   0.084666   
 4         -1.014558         -0.240931    -0.128224       -0.723703  -0.743111   
 ...             ...               ...          ...             ...        ...   
 1164       2.070829          4.041231     0.972811       -0.849811  -0.286407   
 1165      -1.785905          0.487948    -2.192664       -0.702685  -0.229319   
 1166      -0.320346         -0.483890    -0.541112        0.495347  -0.115143   
 1167      -0.628884         -0.666110    -0.128224        0.348220  -0.543303   
 1168      -0.860289         -0.119451    -0.472297       -0.660648   0.427194   
 
       free su

In [45]:
from algos.baseline import Baseline
from sklearn.metrics import mean_absolute_error as MAE, accuracy_score as Accuracy
from imblearn.metrics import macro_averaged_mean_absolute_error as MAMAE
from algos.CEM import CEM
from icecream import ic
metrics: list[dict[str, float]] = []

for i in range(len(folds)):
  train_cv, test_cv = pd.concat(folds[j] for j in range(len(folds)) if j != i), folds[i]
  baseline = Baseline()
  baseline.fit(train_cv.drop("quality", axis=1), train_cv["quality"])
  y_pred = baseline.predict(test_cv.drop("quality", axis=1))
  metrics.append({
    "MAE": MAE(test_cv["quality"], y_pred),
    "Accuracy": Accuracy(test_cv["quality"], y_pred),
    "MAMAE": MAMAE(test_cv["quality"], y_pred),
    "CEM": CEM(test_cv["quality"], y_pred)
  })

{metric: f"{val} +- {pd.DataFrame(metrics).std()[metric]}" for metric, val in pd.DataFrame(metrics).mean().items()}

{'MAE': '0.6418633794681698 +- 0.018052551782724507',
 'Accuracy': '0.43338816871751007 +- 0.015539672740386161',
 'MAMAE': '1.6285714285714286 +- 0.11736911946539269',
 'CEM': '0.14182032955523613 +- 0.31711989747693053'}

In [46]:
import sys
from importlib import reload
reload(sys.modules.get('algos.k_rank', sys))
from algos.k_rank import KRank
import numpy as np

metrics: list[dict[str, float]] = []
for i in range(len(folds)):
    train_cv, test_cv = pd.concat(folds[j] for j in range(len(folds)) if j != i), folds[i]
    krank = KRank()
    krank.fit(np.array(train_cv.drop("quality", axis=1).values, dtype=np.float32), train_cv["quality"].values)
    y_pred = pd.Series(krank.predict(test_cv.drop("quality", axis=1).values))
    metrics.append({
        "MAE": MAE(test_cv["quality"], y_pred),
        "Accuracy": Accuracy(test_cv["quality"], y_pred),
        "MAMAE": MAMAE(test_cv["quality"], y_pred),
        "CEM": CEM(test_cv["quality"], y_pred),
    })
{metric: f"{val} +- {pd.DataFrame(metrics).std()[metric]}" for metric, val in pd.DataFrame(metrics).mean().items()}

{'MAE': '0.6813843375520021 +- 0.09220844875293133',
 'Accuracy': '0.420556835047853 +- 0.034076698413134474',
 'MAMAE': '1.6537541673348037 +- 0.17878533372465738',
 'CEM': '0.13401751454877403 +- 0.2996722727066258'}