In [12]:
import fcalc
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

# Binarized data

In [24]:
df = pd.read_csv("../data/EffectsOnMathsStudy.csv").drop(
    ["G1", "G2", "Mjob", "Fjob", "reason", "guardian"], axis=1
)
df["G3"] = df["G3"].map(lambda val: val >= 10)
df.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,traveltime,studytime,...,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G3
0,GP,F,18,U,GT3,A,4,4,2,2,...,no,no,4,3,4,1,1,3,6,False
1,GP,F,17,U,GT3,T,1,1,1,2,...,yes,no,5,3,3,1,1,3,4,False
2,GP,F,15,U,LE3,T,1,1,1,2,...,yes,no,4,3,2,2,3,3,10,True
3,GP,F,15,U,GT3,T,4,2,1,3,...,yes,yes,3,2,2,1,1,5,2,True
4,GP,F,16,U,GT3,T,3,3,1,2,...,no,no,4,3,2,1,2,5,4,True


In [14]:
X = df.drop(["G3"], axis=1)

for column, v in [
    ("school", "GP"),
    ("sex", "M"),
    ("address", "U"),
    ("famsize", "GT3"),
    ("Pstatus", "T"),
]:
    X[column] = X[column].map(lambda val: val == v)
    
for column in [
    "schoolsup",
    "famsup",
    "paid",
    "activities",
    "nursery",
    "higher",
    "internet",
    "romantic",
]:
    X[column] = X[column].map(lambda val: val == "yes")
    
for column in [
    "age",
    "Medu",
    "Fedu",
    "traveltime",
    "studytime",
    "failures",
    "famrel",
    "freetime",
    "goout",
    "Dalc",
    "Walc",
    "health",
    "absences"
]:
    mean = X[column].mean()
    X[column] = X[column].map(lambda val: val > mean)

y = df["G3"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [15]:
bin_cls = fcalc.classifier.BinarizedBinaryClassifier(
    X_train.values, y_train.to_numpy(), method="standard", alpha=0.2
)

In [16]:
bin_cls.predict(X_test.values)

In [17]:
print(accuracy_score(y_test, bin_cls.predictions))
print(f1_score(y_test, bin_cls.predictions, average="weighted"))

0.6134453781512605
0.6078902149094931


# Pattern structures

In [18]:
df = pd.read_csv("../data/EffectsOnMathsStudy.csv").drop(
    ["G1", "G2", "Mjob", "Fjob", "reason", "guardian"], axis=1
)
df["G3"] = df["G3"].map(lambda val: val >= 10)
df.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,traveltime,studytime,...,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G3
0,GP,F,18,U,GT3,A,4,4,2,2,...,no,no,4,3,4,1,1,3,6,False
1,GP,F,17,U,GT3,T,1,1,1,2,...,yes,no,5,3,3,1,1,3,4,False
2,GP,F,15,U,LE3,T,1,1,1,2,...,yes,no,4,3,2,2,3,3,10,True
3,GP,F,15,U,GT3,T,4,2,1,3,...,yes,yes,3,2,2,1,1,5,2,True
4,GP,F,16,U,GT3,T,3,3,1,2,...,no,no,4,3,2,1,2,5,4,True


In [19]:
X = df.drop(["G3"], axis=1)
y = df["G3"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [20]:
pat_cls = fcalc.classifier.PatternBinaryClassifier(
    X_train.values,
    y_train.to_numpy(),
    categorical=np.arange(X_train.shape[1]),
    method="standard",
    alpha=0,
)

In [21]:
pat_cls.predict(X_test.values)

In [22]:
print(accuracy_score(y_test, pat_cls.predictions))
print(f1_score(y_test, pat_cls.predictions, average="weighted"))

0.6554621848739496
0.5744025180862211
