In [1]:
from tropy.learn import fit_classifier, fit_classifier_onevsall, _inrad_eigenpair, predict_onevsall
from tropy.metrics import accuracy_multiple, veronese_feature_names, print_features_per_class
from tropy.ops import veronese
from tropy.utils import simplex_lattice_points
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

np.set_printoptions(precision=3, suppress=True)

In [2]:
classes = [("./data/winequality-red.csv", False),
           ("./data/winequality-red.csv", True),
           ("./data/winequality-white.csv", False),
           ("./data/winequality-white.csv", True)]

lattice_points = None
features = None

def class_df(class_name, size=None, low_quality=False):
  global lattice_points, features
  base_df = pd.read_csv(class_name, delimiter=';', dtype=float)
  if low_quality:
      base_df = base_df[base_df["quality"] <= 5]
  else:
      base_df = base_df[base_df["quality"] >= 6]
  features = base_df.columns.to_list()
  df_class = base_df.loc[:, 'fixed acidity':'alcohol']
  df_class = MinMaxScaler().fit_transform(df_class)
  df_train, df_test = train_test_split(df_class, test_size=0.2, random_state=43)
  Ctrain, Ctest = df_train.T, df_test.T

  d = Ctrain.shape[0]
  lattice_points = list(simplex_lattice_points(d, size))
  Ctrain, Ctest = veronese(lattice_points, Ctrain), veronese(lattice_points, Ctest)

  print(Ctrain.shape, Ctest.shape)
  return Ctrain, Ctest

In [3]:
size = 4

In [4]:
Clist_train = []
Clist_test = []
for class_name, low_quality in classes:
    train, test = class_df(class_name, size, low_quality)
    Clist_train.append(train)
    Clist_test.append(test)

classes = ["Rouge <= 5", "Rouge >= 6", "Blanc <= 5", "Blanc => 6"]

(1221, 684) (1221, 171)
(1221, 595) (1221, 149)
(1221, 2606) (1221, 652)
(1221, 1312) (1221, 328)


In [5]:
x, l = _inrad_eigenpair(Clist_train, N=50)
print("Apex:", x)
print("Eigval:", l)

100%|██████████| 50/50 [00:12<00:00,  4.17it/s]

Apex: [ 0.064 -0.151 -0.136 ...  0.777  0.672  0.697]
Eigval: 8.881784197001252e-16





In [6]:
predictor, sector_indicator = fit_classifier(Clist_train, x)

In [7]:
accuracy = accuracy_multiple(predictor, Clist_test)
print(f"Accuracy: {round(accuracy, 3)}")

Accuracy: 0.787


In [8]:
print_features_per_class(classes, veronese_feature_names(features, lattice_points), sector_indicator)

Dominant features for each class:
- Rouge <= 5: fixed acidity + citric acid + chlorides, fixed acidity + citric acid + free sulfur dioxide, fixed acidity + citric acid + density, fixed acidity + citric acid + sulphates, fixed acidity + density + alcohol, volatile acidity + citric acid + chlorides, volatile acidity + citric acid + density, volatile acidity + citric acid + pH, volatile acidity + citric acid + alcohol, volatile acidity + residual sugar + pH, volatile acidity + chlorides + free sulfur dioxide, volatile acidity + chlorides + density, volatile acidity + chlorides + alcohol, volatile acidity + free sulfur dioxide + density, volatile acidity + free sulfur dioxide + pH, volatile acidity + free sulfur dioxide + alcohol, volatile acidity + density + pH, volatile acidity + density + alcohol, volatile acidity + pH + alcohol, citric acid + residual sugar + alcohol, citric acid + chlorides + density, citric acid + chlorides + alcohol, citric acid + free sulfur dioxide + density, citr

In [9]:
indicators, apices = fit_classifier_onevsall(Clist_train, x)
accuracy_one_vs_all = accuracy_multiple(predict_onevsall(indicators, apices, Clist_train), Clist_test)
print(f"Accuracy (one-vs-all): {round(accuracy_one_vs_all, 3)}")

  new_x = z - np.max(z) * np.ones_like(x0)
100%|██████████| 50/50 [00:13<00:00,  3.74it/s]
100%|██████████| 50/50 [00:12<00:00,  4.11it/s]
100%|██████████| 50/50 [00:12<00:00,  4.14it/s]
100%|██████████| 50/50 [00:12<00:00,  3.86it/s]


Accuracy (one-vs-all): 0.502
