In [1]:
import numpy as np
import os

In [2]:
def celeba_attr_dataset(root_dir, train_or_test, restricted_degree=0):
    info_pak = np.load(os.path.join(root_dir, 'celeba_attr.npz'))
    train_idxs = info_pak['train_idxs']
    val_idxs = info_pak['val_idxs']
    test_idxs = info_pak['test_idxs']

    attribute_names = info_pak['attribute_names']
    attributes = info_pak['attributes']
    male_attr_idx = 20

    def get_label(idxs, restricted_degree):
        def jj(attr):
            important_attributes_idx = [0, 1, 4, 9, 16, 18, 22, 24, 29, 30, 34, 36, 37, 38]
            x = np.array([0 for i in range(attr.shape[0])])
            for i in important_attributes_idx:
                x = x + attr[:, i]
            return x

        label = attributes[idxs]
        sig = jj(label) >= restricted_degree
        label = label[sig]

        data = np.delete(label, [male_attr_idx], 1)
        label = label[:, male_attr_idx]
        return data.astype('float32'), label

    if train_or_test=='train':
        data, label = get_label(train_idxs, restricted_degree)
    elif train_or_test=='test':
        data, label = get_label(test_idxs, restricted_degree)
    return data, label
        
train_data, train_label = celeba_attr_dataset('./data/toy_celeba', 'train')
test_data, test_label = celeba_attr_dataset('./data/toy_celeba', 'test')

## CART
Code is taken from DrKwint/sounds-deep

In [3]:
import sklearn.tree

# Defien the decision tree
max_leaf_nodes = 20
max_depth = 10
decision_tree = sklearn.tree.DecisionTreeClassifier(
    max_depth=max_depth,
    min_weight_fraction_leaf=0.01,
    max_leaf_nodes=max_leaf_nodes)

In [4]:
decision_tree.fit(train_data, train_label)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=10,
            max_features=None, max_leaf_nodes=20,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.01, presort=False,
            random_state=None, splitter='best')

In [5]:
accu = decision_tree.score(test_data, test_label)
print('Accuracy: {:.4f}'.format(accu))

Accuracy: 0.9281


## NB
From scikit-learn documentation

In [8]:
import sklearn.naive_bayes

model_nb = sklearn.naive_bayes.BernoulliNB()
model_nb.fit(train_data, train_label)
accu = model_nb.score(test_data, test_label)
print('Accuracy: {:.4f}'.format(accu))

Accuracy: 0.9249


## SVM
From scikit-learn documentation

In [10]:
import sklearn.svm

model_svm = sklearn.svm.SVC(gamma='scale')
model_svm.fit(train_data, train_label)
accu = model_svm.score(test_data, test_label)
print('Accuracy: {:.4f}'.format(accu))

Accuracy: 0.9389
