In [1]:
from tropy.learn import fit_classifier, fit_classifier_onevsall, _inrad_eigenpair, predict_onevsall
from tropy.metrics import accuracy_multiple, veronese_feature_names, print_features_per_class
from tropy.ops import veronese
from tropy.utils import simplex_lattice_points
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

np.set_printoptions(precision=3, suppress=True)

In [2]:
base_df = pd.read_csv('./data/breast_cancer.csv')
df = base_df.loc[:, 'radius_mean':'fractal_dimension_worst']
features = df.columns.to_list()
classes = ["M", "B"]
lattice_points = None

def class_df(class_name, size=None):
  global lattice_points
  df_class = df[base_df["diagnosis"].str.contains(class_name)]
  df_train, df_test = train_test_split(df_class, test_size=0.2, random_state=43)
  Ctrain, Ctest = df_train.to_numpy(dtype=float).T, df_test.to_numpy(dtype=float).T

  if size is not None:
    d = Ctrain.shape[0]
    lattice_points = list(simplex_lattice_points(d, size))
    Ctrain, Ctest = veronese(lattice_points, Ctrain), veronese(lattice_points, Ctest)

  print(Ctrain.shape, Ctest.shape)
  return Ctrain, Ctest

In [3]:
size = None

In [4]:
Clist_train, Clist_test = [], []
for class_name in classes:
  train, test = class_df(class_name, size)
  Clist_train.append(train)
  Clist_test.append(test)

(30, 169) (30, 43)
(30, 285) (30, 72)


In [5]:
x, l = _inrad_eigenpair(Clist_train, N=50)
print("Apex:", x)
print("Eigval:", l)

100%|██████████| 50/50 [00:00<00:00, 3118.91it/s]

Apex: [ -63.564  -50.092   25.596  796.096  -79.185  -79.141  -79.161  -79.231
  -79.084  -79.227  -78.648  -77.094  -74.401   -2.419  -79.283  -79.265
  -79.26   -79.279  -79.274  -79.287  -61.908  -39.201   36.196 1013.996
  -79.122  -78.821  -78.729  -79.131  -78.887  -79.19 ]
Eigval: 0.00018700725649978267





In [6]:
predictor, sector_indicator = fit_classifier(Clist_train, x)
indicators, apices = fit_classifier_onevsall(Clist_train)

100%|██████████| 50/50 [00:00<00:00, 2621.37it/s]
100%|██████████| 50/50 [00:00<00:00, 3039.35it/s]


In [7]:
accuracy = accuracy_multiple(predictor, Clist_test)
print(f"Accuracy: {round(accuracy, 3)}")

Accuracy: 0.896


In [8]:
print_features_per_class(classes, features, sector_indicator)

Dominant features for each class:
- M: radius_mean, texture_mean, perimeter_mean, area_mean, concavity_mean, concave points_mean, radius_se, perimeter_se, area_se, radius_worst, texture_worst, perimeter_worst, area_worst, smoothness_worst, compactness_worst, concavity_worst, concave points_worst, symmetry_worst, fractal_dimension_worst
- B: smoothness_mean, compactness_mean, symmetry_mean, fractal_dimension_mean, texture_se, smoothness_se, compactness_se, concavity_se, concave points_se, symmetry_se, fractal_dimension_se
