In [10]:
from tropy.learn import fit_classifier, fit_classifier_onevsall, _inrad_eigenpair, predict_onevsall
from tropy.metrics import accuracy_multiple, veronese_feature_names, print_features_per_class
from tropy.ops import veronese
from tropy.utils import simplex_lattice_points
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

np.set_printoptions(precision=3, suppress=True)

In [11]:
classes = [("./data/winequality-red.csv", False),
           ("./data/winequality-red.csv", True),
           ("./data/winequality-white.csv", False),
           ("./data/winequality-white.csv", True)]

lattice_points = None
features = None

def class_df(class_name, size=None, low_quality=False):
  global lattice_points, features
  base_df = pd.read_csv(class_name, delimiter=';', dtype=float)
  if low_quality:
      base_df = base_df[base_df["quality"] <= 5]
  else:
      base_df = base_df[base_df["quality"] >= 6]
  features = base_df.columns.to_list()
  df_class = base_df.loc[:, 'fixed acidity':'alcohol']
  df_class = MinMaxScaler().fit_transform(df_class)
  df_train, df_test = train_test_split(df_class, test_size=0.2, random_state=43)
  Ctrain, Ctest = df_train.T, df_test.T

  if size is not None:
    d = Ctrain.shape[0]
    lattice_points = list(simplex_lattice_points(d, size))
    Ctrain, Ctest = veronese(lattice_points, Ctrain), veronese(lattice_points, Ctest)

  print(Ctrain.shape, Ctest.shape)
  return Ctrain, Ctest

In [12]:
size = 4

In [13]:
Clist_train = []
Clist_test = []
for class_name, low_quality in classes:
    train, test = class_df(class_name, size, low_quality)
    Clist_train.append(train)
    Clist_test.append(test)

classes = ["Rouge <= 5", "Rouge >= 6", "Blanc <= 5", "Blanc => 6"]

(11231, 684) (11231, 171)
(11231, 595) (11231, 149)
(11231, 2606) (11231, 652)
(11231, 1312) (11231, 328)


In [14]:
x, l = _inrad_eigenpair(Clist_train, N=50)
print("Apex:", x)
print("Eigval:", l)

100%|██████████| 50/50 [01:06<00:00,  1.33s/it]

Apex: [-0.124 -0.1   -0.054 ...  1.412  1.063  1.332]
Eigval: 8.881784197001252e-16





In [15]:
predictor, sector_indicator = fit_classifier(Clist_train, x)

In [16]:
accuracy = accuracy_multiple(predictor, Clist_test)
print(f"Accuracy: {round(accuracy, 3)}")

Accuracy: 0.877


In [17]:
print_features_per_class(classes, veronese_feature_names(features, lattice_points), sector_indicator)

Dominant features for each class:
- Rouge <= 5: fixed acidity + volatile acidity + citric acid + chlorides, fixed acidity + volatile acidity + chlorides + density, fixed acidity + citric acid + residual sugar + density, fixed acidity + citric acid + chlorides + density, fixed acidity + citric acid + free sulfur dioxide + density, fixed acidity + citric acid + free sulfur dioxide + alcohol, fixed acidity + citric acid + density + sulphates, fixed acidity + citric acid + density + alcohol, volatile acidity + citric acid + chlorides + free sulfur dioxide, volatile acidity + citric acid + chlorides + density, volatile acidity + citric acid + chlorides + alcohol, volatile acidity + citric acid + free sulfur dioxide + density, volatile acidity + citric acid + free sulfur dioxide + alcohol, volatile acidity + citric acid + density + pH, volatile acidity + citric acid + density + sulphates, volatile acidity + citric acid + density + alcohol, volatile acidity + citric acid + pH + alcohol, volat

In [18]:
indicators, apices = fit_classifier_onevsall(Clist_train, x)
accuracy_one_vs_all = accuracy_multiple(predict_onevsall(indicators, apices, Clist_train), Clist_test)
print(f"Accuracy (one-vs-all): {round(accuracy_one_vs_all, 3)}")

100%|██████████| 50/50 [01:08<00:00,  1.37s/it]
100%|██████████| 50/50 [01:10<00:00,  1.41s/it]
100%|██████████| 50/50 [01:14<00:00,  1.49s/it]
100%|██████████| 50/50 [01:13<00:00,  1.48s/it]


Accuracy (one-vs-all): 0.922
