In [8]:
import numpy as np
import pandas as pd
    

def reduction(X, y, K):
    # num of classes
    if K is None:
        K = len(np.unique(y))
    #print("original number of features: ", X.shape[1])
    #print("original number of target classes: ", self.K)
    #print("original number of observations: ", X.shape[0])

    
    # for each point, create (K-1) replicas each with (K-2) new features
    # the new target label is a binary label
    new_X = []
    new_y = []
    for i in range(X.shape[0]): # for each point
        k = y.iloc[i]  # original class label
        for q in range(K - 1):  # for each replica (= number of hyperplanes)
            new_variables = [0] * (K - 2) # create (K-2) new variables
            
            # if q is not the first replica, set the (q-1)th variable to 1
            if q > 0:
                new_variables[q-1] = 1
            
            # create a new point by concatenating the original point with the new variables
            new_point = np.concatenate((X.iloc[i, :], new_variables))
            new_X.append(new_point)
            
            # create the binary label
            if k-1 <= q: 
                new_y.append(0) # C1  
            else:  
                new_y.append(1) # C2

    new_X = pd.DataFrame(new_X).reset_index(drop=True)
    # rename last (K-2) columns to sbcol1, sbcol2, ..., sbcol(K-2)
    new_X.columns = list(X.columns) + [f'sbcol{i+1}' for i in range(K-2)]
    
    new_y = pd.DataFrame(new_y).reset_index(drop=True)
    # rename binary label column
    new_y.columns = ['binary_label']
    
    new_data = pd.concat([new_X, new_y], axis=1)

    # print some information about the new data
    #print("new number of features: ", new_X.shape[1], " (original number of features +", self.K - 2, ")")
    #print("new number of target classes: ", len(np.unique(new_y)))
    #print("new number of observations: ", new_X.shape[0], " (original number of observations *", self.K - 1, ")")
    #print(new_data.head())
    
    return new_X, new_y



def classif(pred_sbc_y, K):
    # get classification of all replicas of each point
    num_replicas = K - 1
    all_labels = [pred_sbc_y[i:i + num_replicas] for i in range(0, len(pred_sbc_y), num_replicas)]
    all_labels = np.array(all_labels)
    
    # SISTEMA DE VOTOS
    # if all replicas are 0, then the class is 0
    # if one is 1 and the rest are 0, then the class is 1
    # if two are 1 and the rest are 0, then the class is 2
    # ...
    # if all replicas are 1, then the class is K-1
    final_labels = np.sum(all_labels, axis=1) 


    return final_labels


In [9]:
binary_X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
binary_X = pd.DataFrame(binary_X)
binary_X.columns = ['col1', 'col2']

binary_y = np.array([1,2,3,2])
binary_y = pd.Series(binary_y)


replicated_data, replicated_labels = reduction(binary_X, binary_y, 3)
print("Replicated Data:", replicated_data)
print("Replicated Labels:", replicated_labels)

Replicated Data:    col1  col2  sbcol1
0     1     2       0
1     1     2       1
2     3     4       0
3     3     4       1
4     5     6       0
5     5     6       1
6     7     8       0
7     7     8       1
Replicated Labels:    binary_label
0             0
1             0
2             1
3             0
4             1
5             1
6             1
7             0
