## SBC (single binary classifier) reduction and classification

In [38]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

In [39]:
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
X = pd.DataFrame(X)
y = np.array(['a', 'b', 'c', 'd'])
y = pd.Series(y)

mapping = {0: 'a', 1: 'b', 2: 'c', 3: 'd'}

In [40]:
aesthetic_evaluation_data = pd.read_csv('../datasets/aesthetic_evaluation_data.csv').drop(columns=['Image Filename','Author','Objective Evaluation', 'sX2L Value','sX2a Value','sX2b Value','sX2Lab Value','sEMDL Value','sEMDa Value','sEMDb Value','sEMDLab Value'])

# get only the first 10 rows 
# but since there's no 'Poor' in the first 10 rows, add it manually
poor_row = aesthetic_evaluation_data[aesthetic_evaluation_data['Subjective Evaluation'] == 'Poor'].iloc[0]
aesthetic_evaluation_data = aesthetic_evaluation_data.head(9)
aesthetic_evaluation_data = pd.concat([aesthetic_evaluation_data, poor_row.to_frame().T], ignore_index=True)

aesthetic_evaluation_X = aesthetic_evaluation_data.drop(columns='Subjective Evaluation')
aesthetic_evaluation_y = aesthetic_evaluation_data['Subjective Evaluation']

aesthetic_evaluation_mapping = {0: 'Poor', 1: 'Fair', 2: 'Good', 3: 'Excellent'}

aesthetic_evaluation_data.head(10)

Unnamed: 0,Subjective Evaluation,BRA Value,LBC Value,UNR Value,BCE Value,BCD Value,BAD Value,BOD Value,pBRA Value,pLBC Value,...,pBAD Value,pBOD Value,cX2L Value,cX2a Value,cX2b Value,cX2Lab Value,cEMDL Value,cEMDa Value,cEMDb Value,cEMDLab Value
0,Good,1.149009,0.830915,0.107618,0.938534,1.934477,4.885198,17.29637,0.049401,0.033356,...,0.029188,0.10334,0.097502,0.009506,0.217169,0.10067,1.503086,0.030362,0.291363,0.210082
1,Good,2.368824,1.326447,1.599444,0.272997,0.035378,19.932998,68.761479,0.113919,0.058509,...,0.117875,0.406627,0.305692,0.159653,0.193317,0.061736,1.2486,0.206529,0.351693,0.105346
2,Good,1.409939,0.950935,0.278358,1.229293,2.221074,22.27777,38.32343,0.067231,0.038613,...,0.120132,0.206658,0.109386,0.024217,0.280625,0.012974,0.545858,0.07552,0.304185,0.049345
3,Good,2.059918,0.720926,1.786307,1.065381,1.995007,25.16341,31.63107,0.100705,0.032141,...,0.15603,0.196133,0.038581,0.032267,0.026153,0.010779,0.334111,0.105626,0.137693,0.031455
4,Good,2.452595,0.798596,1.868745,1.070148,1.218189,0.395585,31.426783,0.117249,0.035235,...,0.00282,0.224062,0.021259,0.027071,0.029206,0.006495,0.165401,0.081549,0.091768,0.030546
5,Excellent,0.951696,0.113046,0.950776,1.063822,1.757753,8.532637,17.880099,0.044664,0.004796,...,0.056474,0.11834,0.084273,0.042964,0.128076,0.037557,0.775643,0.162126,0.209275,0.105691
6,Fair,0.895108,0.850773,0.453674,0.3971,0.559489,0.471835,16.884602,0.042506,0.036583,...,0.003571,0.127797,0.042711,0.02955,0.045899,0.034033,0.179981,0.086489,0.147702,0.050502
7,Good,3.100714,1.614997,2.713865,1.098868,0.498283,12.972883,33.147592,0.138786,0.070177,...,0.081306,0.207749,0.084506,0.083272,0.644688,0.131429,1.235154,0.137911,0.629805,0.234235
8,Good,0.978128,0.006449,0.515795,0.522243,3.378563,22.43618,33.791902,0.044836,0.000262,...,0.155069,0.233554,0.088175,0.058499,0.432515,0.012727,0.509642,0.125125,0.397542,0.025845
9,Poor,4.287771,2.025147,4.080698,2.055551,5.83788,79.573665,95.960856,0.18933,0.081771,...,0.420374,0.506945,0.282464,0.028169,0.433739,0.119059,1.540188,0.074895,0.410333,0.200164


- for $K$ classes
- find $K-1$ parallel hyperplanes.
- $s$ is the number of classes to the left / right of the hyperplane.
- for $s = K-1$,
- an hyperplane separates 2 sets of classes.
- each hyperplane is a binary classification problem, with the additional constraint of parallelism.
- all these problems have to be solved simultaneously in an augmented feature space
    - for $x$ in $R^{d}$, the augmented feature space is $R^{d+1}$
- for each $x$ create $(K-1)$ new points: $(x, 0)(x,h_{1})...(x,h_{K-2})$, where $h$ is a positive constant
- define a binary training set
    - $(x_i^{(1)},0)$ belongs to $B_1$ and $(x_i^{(2)},0)..(x_i^{(K)},0)$ belong to $B_2$
    - $(x_i^{(1)},h_1)$ and $(x_i^{(2)},h_1)$ belongs to $B_1$ and $(x_i^{(3)},h_1)..(x_i^{(K)}, h_1)$ belong to $B_2$
    - ...
    - $(x_i^{(1)},h_{K-2})..(x_i^{(K-1)},h_{K-2})$ belong to $B_1$ and $(x_i^{(K)},h_{K-2})$ belongs to $B_2$

In [41]:
def sbc_reduction(X, y, mapping=None, h=1):
    # num of classes
    K = len(np.unique(y))
    
    print("original num classes: ", K)
    print("original num observations: ", X.shape[0])
    
    # num of parallel hyperplanes to be created (and replicas)
    s = K-1
    
    # if class labels not integer, convert to integer
    if not np.issubdtype(y.dtype, np.integer):
        # if a mapping is not provided, create one
        if mapping is None:
            new_y = pd.Series(pd.factorize(y)[0])
            # show the mapping
            mapping = dict(enumerate(pd.factorize(y)[1]))
            print("mapping: ", mapping)
            y = new_y
        # if it is, use it
        else:
            print("using provided mapping: ", mapping)
            y = pd.Series(y.map(lambda v: {v_:k_ for k_, v_ in mapping.items()}[v]))

    
    # for each point, create s replicas each with a new feature in [0, h, h*2, ... h*(s-1)]
    # the new label is a binary label
    new_X = []
    new_y = []
    for i in range(X.shape[0]): # for each point
        for j in range(s): # for each replica
            new_X.append(np.append(X.iloc[i].values, h*j))
            new_label = y.iloc[i] > j
            new_y.append(new_label.astype(int))
    
    new_X = pd.DataFrame(new_X).reset_index(drop=True)
    new_y = pd.DataFrame(new_y).reset_index(drop=True)
    new_data = pd.concat([new_X, new_y], axis=1)
    # rename binary label column
    new_data.columns = list(new_X.columns) + ['binary_label']
    
    print("new num classes: ", len(np.unique(new_y)))
    print("new num observations: ", new_X.shape[0], " (original num observations *", s, ")")
    
    return new_X, new_y, new_data, mapping


new_X, new_y, new_data, mapping = sbc_reduction(X, y, mapping)
old_data = pd.concat([X, y], axis=1)
print("old data:")
print(old_data)
print("new data:")
print(new_data)

original num classes:  4
original num observations:  4
using provided mapping:  {0: 'a', 1: 'b', 2: 'c', 3: 'd'}
new num classes:  2
new num observations:  12  (original num observations * 3 )
old data:
   0  1  0
0  1  2  a
1  3  4  b
2  5  6  c
3  7  8  d
new data:
    0  1  2  binary_label
0   1  2  0             0
1   1  2  1             0
2   1  2  2             0
3   3  4  0             1
4   3  4  1             0
5   3  4  2             0
6   5  6  0             1
7   5  6  1             1
8   5  6  2             0
9   7  8  0             1
10  7  8  1             1
11  7  8  2             1


In [42]:
new_aesthetic_evaluation_X, new_aesthetic_evaluation_y, new_aesthetic_evaluation_data, _ = sbc_reduction(aesthetic_evaluation_X, aesthetic_evaluation_y, aesthetic_evaluation_mapping, h=1)
print("old aesthetic evaluation data:")
print(aesthetic_evaluation_data)
print("new aesthetic evaluation data:")
print(new_aesthetic_evaluation_data)

original num classes:  4
original num observations:  10
using provided mapping:  {0: 'Poor', 1: 'Fair', 2: 'Good', 3: 'Excellent'}
new num classes:  2
new num observations:  30  (original num observations * 3 )
old aesthetic evaluation data:
  Subjective Evaluation BRA Value LBC Value UNR Value BCE Value BCD Value  \
0                  Good  1.149009  0.830915  0.107618  0.938534  1.934477   
1                  Good  2.368824  1.326447  1.599444  0.272997  0.035378   
2                  Good  1.409939  0.950935  0.278358  1.229293  2.221074   
3                  Good  2.059918  0.720926  1.786307  1.065381  1.995007   
4                  Good  2.452595  0.798596  1.868745  1.070148  1.218189   
5             Excellent  0.951696  0.113046  0.950776  1.063822  1.757753   
6                  Fair  0.895108  0.850773  0.453674    0.3971  0.559489   
7                  Good  3.100714  1.614997  2.713865  1.098868  0.498283   
8                  Good  0.978128  0.006449  0.515795  0.522243  

apply a linear two class classifier

In [43]:
from sklearn.linear_model import LogisticRegression

logistic = LogisticRegression()
logistic.solver = 'liblinear'
logistic.penalty = 'l1'
logistic.fit(new_X, new_y.values.ravel())  # convert y to 1d array
pred_sbc_y = logistic.predict(new_X)
print(pred_sbc_y)

[1 0 0 1 1 0 1 1 0 1 1 1]


In [44]:
logistic.fit(new_aesthetic_evaluation_X, new_aesthetic_evaluation_y.values.ravel()) 
pred_sbc_y_aesthetic = logistic.predict(new_aesthetic_evaluation_X)
print(pred_sbc_y_aesthetic)

[1 1 0 1 1 0 1 1 0 1 0 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 0 0 0]


to classify an example
- classify all its replicas
- get a sequence of $K-1$ labels
- from this sequence, infer the class of the original example following the rule:
    (for $K=3$)
    - if both are $C_1$, then the class is $C_1$
    - if one is $C_1$ and the other is $C_2$, then the class is $C_2$
    - if both are $C_2$, then the class is $C_3$

In [45]:
def sbc_classif(og_y, pred_sbc_y, mapping):
    K = len(np.unique(og_y)) # num of classes
    s = K-1 # num of hyperplanes / replicas
    
    # get classification of all replicas of each point
    all_labels = [pred_sbc_y[i:i + s] for i in range(0, len(pred_sbc_y), s)]
    all_labels = np.array(all_labels)
    print("all labels:", all_labels)
    
    # get the class of the point
    # if all replicas are 0, then the class is 0
    # if one is 1 and the rest are 0, then the class is 1
    # if two are 1 and the rest are 0, then the class is 2
    # ...
    # if all replicas are 1, then the class is K
    final_labels = np.sum(all_labels, axis=1)
    print("final labels (before mapping): ", final_labels)
    
    # convert back to original labels if mapping is provided
    if mapping is not None:
        print("mapping: ", mapping)
        final_labels = pd.Series(final_labels).map(mapping)
    
    return final_labels


final_labels = sbc_classif(y, pred_sbc_y, mapping)
print("predicted labels: ", final_labels)
accuracy = accuracy_score(y, final_labels)
print("real labels: ", y.values)
print("accuracy: ", accuracy)

all labels: [[1 0 0]
 [1 1 0]
 [1 1 0]
 [1 1 1]]
final labels (before mapping):  [1 2 2 3]
mapping:  {0: 'a', 1: 'b', 2: 'c', 3: 'd'}
predicted labels:  0    b
1    c
2    c
3    d
dtype: object
real labels:  ['a' 'b' 'c' 'd']
accuracy:  0.5


In [46]:
final_labels_aesthetic = sbc_classif(aesthetic_evaluation_y, pred_sbc_y_aesthetic, aesthetic_evaluation_mapping)
print("predicted labels aesthetic: ", final_labels_aesthetic)
accuracy_aesthetic = accuracy_score(aesthetic_evaluation_y, final_labels_aesthetic)
print("real labels aesthetic: ", aesthetic_evaluation_y.values)
print("accuracy aesthetic: ", accuracy_aesthetic)

all labels: [[1 1 0]
 [1 1 0]
 [1 1 0]
 [1 0 0]
 [1 1 0]
 [1 1 0]
 [1 1 0]
 [1 1 0]
 [1 1 0]
 [0 0 0]]
final labels (before mapping):  [2 2 2 1 2 2 2 2 2 0]
mapping:  {0: 'Poor', 1: 'Fair', 2: 'Good', 3: 'Excellent'}
predicted labels aesthetic:  0    Good
1    Good
2    Good
3    Fair
4    Good
5    Good
6    Good
7    Good
8    Good
9    Poor
dtype: object
real labels aesthetic:  ['Good' 'Good' 'Good' 'Good' 'Good' 'Excellent' 'Fair' 'Good' 'Good'
 'Poor']
accuracy aesthetic:  0.7


In [47]:
def apply_mapping(y, mapping):
    if (y.dtype == 'int'):
        return pd.Series(y).map(mapping)
    else:
        return pd.Series(y.map(lambda v: {v_:k_ for k_, v_ in mapping.items()}[v]))

y = [0, 1, 2, 3]
y = pd.Series(y)
mapped_y = apply_mapping(y, mapping)
print("Mapped y: ", mapped_y)

y_2 = ['a', 'b', 'c', 'd']
y_2 = pd.Series(y_2)
mapped_y_2 = apply_mapping(y_2, mapping)
print("Mapped y_2: ", mapped_y_2)

Mapped y:  0    a
1    b
2    c
3    d
dtype: object
Mapped y_2:  0    0
1    1
2    2
3    3
dtype: int64
