In [15]:
import fcalc
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

# Binarized data

In [26]:
df = pd.read_csv("../data/WineQT.csv").drop(["Id"], axis=1)
df["quality"] = df["quality"].map(lambda val: val > 5)
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,False
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,False
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,False
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,True
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,False


In [17]:
X = df.drop(["quality"], axis=1)
for column in X.columns.values:
    mean = df[column].mean()
    X[column] = X[column].map(lambda val: val > mean)

y = df["quality"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [18]:
bin_cls = fcalc.classifier.BinarizedBinaryClassifier(
    X_train.values, y_train.to_numpy(), method="standard", alpha=0.2
)

In [19]:
bin_cls.predict(X_test.values)

In [20]:
print(accuracy_score(y_test, bin_cls.predictions))
print(f1_score(y_test, bin_cls.predictions, average="weighted"))

0.6763848396501457
0.7047833275552483


# Pattern structures

In [21]:
df = pd.read_csv("../data/WineQT.csv").drop(["Id"], axis=1)
df["quality"] = df["quality"].map(lambda val: val > 5)
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,False
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,False
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,False
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,True
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,False


In [22]:
X = df.drop(["quality"], axis=1)
y = df["quality"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [23]:
pat_cls = fcalc.classifier.PatternBinaryClassifier(
    X_train.values,
    y_train.to_numpy(),
    categorical=np.arange(X_train.shape[1]),
    method="standard",
    alpha=0.2,
)

In [24]:
pat_cls.predict(X_test.values)

In [25]:
print(accuracy_score(y_test, pat_cls.predictions))
print(f1_score(y_test, pat_cls.predictions))

0.6297376093294461
0.6018808777429466
