In [1]:
import pandas as pd
from HQC import HQC
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import model_selection
from sklearn import metrics

In [2]:
df = pd.read_csv('balance-scale.tsv', sep='\t')
X = df.drop('target', axis=1).values
Y = df['target'].values

In [3]:
df['target'].value_counts()

2    288
1    288
0     49
Name: target, dtype: int64

In [4]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import check_classification_targets

class HQC_no_complex(BaseEstimator, ClassifierMixin):
    """The Helstrom Quantum Centroid (HQC) classifier is a quantum-inspired supervised classification 
    approach for data with binary classes (ie. data with 2 classes only).
                         
    Parameters
    ----------
    rescale : int, default = 1
        The dataset rescaling factor. A parameter used for rescaling the dataset. 
    n_copies : int, default = 1
        The number of copies to take for each quantum density. This is equivalent to taking the 
        n-fold Kronecker tensor product for each quantum density.       
    Attributes
    ----------
    classes_ : ndarray, shape (2,)
        Sorted binary classes.
    centroid_class_0_ : ndarray, shape (n_features + 1, n_features + 1)
        Quantum Centroid for class with index 0.
    centroid_class_1_ : ndarray, shape (n_features + 1, n_features + 1)
        Quantum Centroid for class with index 1.
    q_Hels_obs_ : ndarray, shape (n_features + 1, n_features + 1)
        Quantum Helstrom observable.
    proj_pos_ : ndarray, shape (n_features + 1, n_features + 1)
        Sum of the projectors of the Quantum Helstrom observable's eigenvectors, which has 
        corresponding positive eigenvalues.
    proj_neg_ : ndarray, shape (n_features + 1, n_features + 1)
        Sum of the projectors of the Quantum Helstrom observable's eigenvectors, which has 
        corresponding negative eigenvalues.
    Hels_bound_ : float
        Helstrom bound is the upper bound of the probability that one can correctly discriminate 
        whether a quantum density is of which of the two binary quantum density pattern.          
    """
    # Added binary_only tag as required by sklearn check_estimator
    def _more_tags(self):
        return {'binary_only': True}
    
    
    def __init__(self, rescale=1, n_copies=1):
        self.rescale = rescale
        self.n_copies = n_copies
        
        
    def fit(self, X, y):
        """Perform HQC classification with the inverse of the standard stereographic projection encoding, 
        with the option to rescale the dataset prior to encoding.
                
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The training input samples. An array of int or float.
        y : array-like, shape (n_samples,)
            The training input binary target values. An array of str, int or float.
            
        Returns
        -------
        self : object
            Returns self.
        """
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        
        # Ensure target y is of non-regression type
        # Added as required by sklearn check_estimator
        check_classification_targets(y)
    
        # Store binary classes and encode y into binary class indexes 0 and 1
        self.classes_, y_class_index = np.unique(y, return_inverse=True)
        
        # Cast X to float to ensure all following calculations below are done in float rather than int 
        X = X.astype(float)
        
        # Rescale X
        X = self.rescale*X
        
        # Calculate sum of squares of each row (sample) in X
        X_sq_sum = (X**2).sum(axis=1)
        
        # Number of rows in X
        m = X.shape[0]
        
        # Number of columns in X
        n = X.shape[1]
        
        # Initialize array X_prime
        X_prime = np.empty((m,n+1))
        # Calculate X'
        for i in range(0,m):
            X_prime[i,:] = (1/(X_sq_sum[i]+1))*(np.concatenate((2*X,(X_sq_sum-1).reshape((-1,1))),axis=1)[i,:])
        
        # Determine rows (samples) in X' belonging to class index 0
        X_prime_class_0 = X_prime[y_class_index==0]
        
        # Determine rows (samples) in X' belonging to class index 1
        X_prime_class_1 = X_prime[y_class_index==1]
        
        # Number of rows (samples) in X'
        M = m
        
        # Number of rows (samples) in X' belonging to class index 0
        M_class_0 = X_prime_class_0.shape[0]
        
        # Number of rows (samples) in X' belonging to class index 1
        M_class_1 = X_prime_class_1.shape[0]
        
        # Initialize array density_class_0
        density_class_0 = np.zeros(((n+1)**self.n_copies,(n+1)**self.n_copies))
        for i in range(0,M_class_0):
            # Encode into quantum densities by using the inverse of the standard stereographic projection 
            # encoding method 
            density_each_row = np.dot(X_prime_class_0[i,:].reshape(-1,1),X_prime_class_0[i,:].reshape(1,-1))
            
            # Calculate n-fold Kronecker tensor product
            if self.n_copies==1:
                density_each_row = density_each_row
            else:
                density_each_row_copy = density_each_row
                for j in range(0,self.n_copies-1):
                    density_each_row = np.kron(density_each_row,density_each_row_copy)
                    
            # Calculate sum of quantum densities belonging to class index 0
            density_class_0 = density_class_0 + density_each_row
            
        # Calculate Quantum Centroid for class index 0
        self.centroid_class_0_ = (1/M_class_0)*density_class_0
        
        # Initialize array density_class_1
        density_class_1 = np.zeros(((n+1)**self.n_copies,(n+1)**self.n_copies))
        for i in range(0,M_class_1):
            # Encode into quantum densities by using the inverse of the standard stereographic projection 
            # encoding method
            density_each_row = np.dot(X_prime_class_1[i,:].reshape(-1,1),X_prime_class_1[i,:].reshape(1,-1))
            
            # Calculate n-fold Kronecker tensor product
            if self.n_copies==1:
                density_each_row = density_each_row
            else:
                density_each_row_copy = density_each_row
                for j in range(0,self.n_copies-1):
                    density_each_row = np.kron(density_each_row,density_each_row_copy)
                    
            # Calculate sum of quantum densities belonging to class index 1        
            density_class_1 = density_class_1 + density_each_row
            
        # Calculate Quantum Centroid for class index 1
        # Added ZeroDivisionError as required by sklearn check_estimator
        try:
            self.centroid_class_1_ = (1/M_class_1)*density_class_1
        except ZeroDivisionError:
            self.centroid_class_1_ = 0

        # Calculate quantum Helstrom observable
        self.q_Hels_obs_ = (M_class_0/M)*self.centroid_class_0_ - (M_class_1/M)*self.centroid_class_1_
        
        # Calculate eigenvalues w and eigenvectors v of the quantum Helstrom observable
        w, v = np.linalg.eig(self.q_Hels_obs_)
        
        # Length of w
        len_w = len(w)
        
        # Initialize arrays self.proj_pos_ and self.proj_neg_
        self.proj_pos_ = np.zeros_like(self.q_Hels_obs_)
        self.proj_neg_ = np.zeros_like(self.q_Hels_obs_)
        # Calculate sum of projectors of eigenvectors with corresponding positive and negative 
        # eigenvalues, respectively
        for i in range(0,len_w):
            if w[i] > 0:
                self.proj_pos_ = self.proj_pos_ + np.dot(v[:,i].reshape(-1,1),v[:,i].reshape(1,-1))
            else:
                self.proj_neg_ = self.proj_neg_ + np.dot(v[:,i].reshape(-1,1),v[:,i].reshape(1,-1))
    
        # Calculate Helstrom bound
        self.Hels_bound_ = (M_class_0/M)*np.trace(np.dot(self.centroid_class_0_,self.proj_pos_)) \
                           + (M_class_1/M)*np.trace(np.dot(self.centroid_class_1_,self.proj_neg_))
        return self
        
        
    def predict_proba(self, X):
        """Performs HQC classification on X and returns the trace of the dot product of the densities and the 
        sum of the projectors with corresponding positive and negative eigenvalues, respectively.
        
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The input samples. An array of int or float.       
            
        Returns
        -------
        trace_matrix : array-like, shape (n_samples, 2)
            Column index 0 corresponds to the trace of the dot product of the densities and the sum of 
            projectors with positive eigenvalues. Column index 1 corresponds to the trace of the dot 
            product of the densities and the sum of projectors with negative eigenvalues. An array of float.
        """
        # Check if fit had been called
        check_is_fitted(self, ['proj_pos_', 'proj_neg_'])

        # Input validation
        X = check_array(X)
        
        # Cast X to float to ensure all following calculations below are done in float rather than int 
        X = X.astype(float)        
        
        # Rescale X
        X = self.rescale*X        
        
        # Calculate sum of squares of each row (sample) in X
        X_sq_sum = (X**2).sum(axis=1)
        
        # Number of rows in X
        m = X.shape[0]
        
        # Number of columns in X
        n = X.shape[1]
        
        # Initialize array X_prime
        X_prime = np.empty((m,n+1))
        # Calculate X'
        for i in range(0,m):
            X_prime[i,:] = (1/(X_sq_sum[i]+1))*(np.concatenate((2*X,(X_sq_sum-1).reshape((-1,1))),axis=1)[i,:])
            
        # Initialize array trace_matrix (which can contain complex numbers)
        trace_matrix = np.empty((m,2), dtype=np.complex)
        for i in range (0,m):
            # Encode into quantum densities by using the inverse of the standard stereographic projection 
            # encoding method
            density_each_row = np.dot(X_prime[i,:].reshape(-1,1),X_prime[i,:].reshape(1,-1))
            
            # Calculate n-fold Kronecker tensor product
            if self.n_copies==1:
                density_each_row = density_each_row
            else:
                density_each_row_copy = density_each_row
                for j in range(0,self.n_copies-1):
                    density_each_row = np.kron(density_each_row,density_each_row_copy)
                    
            # Calculate trace of the dot product of density of each row and sum of projectors with corresponding 
            # positive and negative eigenvalues, respectively
            trace_matrix[i,0] = np.trace(np.dot(density_each_row,self.proj_pos_))
            trace_matrix[i,1] = np.trace(np.dot(density_each_row,self.proj_neg_))
        return np.real(trace_matrix)
    
    
    def predict(self, X):
        """Performs HQC classification on X and returns the binary classes.
        
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The input samples. An array of int or float.
            
        Returns
        -------
        self.classes_[predict_trace_index] : array-like, shape (n_samples,)
            The predicted binary classes. An array of str, int or float.
        """
        # Determine column index with the higher trace value in trace_matrix
        # If both columns have the same trace value, returns column index 0
        predict_trace_index = np.argmax(self.predict_proba(X), axis=1)
        # Returns the predicted binary classes
        return self.classes_[predict_trace_index]

In [20]:
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.2, random_state=2, stratify=Y)

In [6]:
# Create rescale hyperparamter list [0.1, 0.5, 1, 1.5,...,10.0]
rescale_list1 = [0.1]
rescale_list2 = np.linspace(0.5, 10, 20).tolist()
rescale_list1.extend(rescale_list2)
rescale_arr1 = np.array(rescale_list1)

In [21]:
# OneVsOneClassifier
# n_copies=1
f1_sc = np.empty_like(rescale_arr1)
for i, j in enumerate(rescale_list1):
    model = OneVsOneClassifier(HQC_no_complex(rescale=j, n_copies=1), n_jobs=-1).fit(X_train, Y_train)
    Y_hat = model.predict(X_test)
    f1_sc[i] = metrics.f1_score(Y_test, Y_hat, average='weighted')
    
scores_table = np.concatenate((rescale_arr1.reshape(-1,1), np.array(f1_sc).reshape(-1,1)), axis=1)
pd.DataFrame(scores_table, columns=['rescale', 'F1 score'])

Unnamed: 0,rescale,F1 score
0,0.1,0.805113
1,0.5,0.782111
2,1.0,0.79094
3,1.5,0.823116
4,2.0,0.769216
5,2.5,0.667501
6,3.0,0.558838
7,3.5,0.464874
8,4.0,0.411243
9,4.5,0.319733


In [22]:
model = OneVsOneClassifier(HQC_no_complex(rescale=0.1, n_copies=1), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2,
       1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2,
       1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2], dtype=int64)

In [23]:
model = OneVsOneClassifier(HQC_no_complex(rescale=9, n_copies=1), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

In [10]:
# n_copies=2
f1_sc = np.empty_like(rescale_arr1)
for i, j in enumerate(rescale_list1):
    model = OneVsOneClassifier(HQC_no_complex(rescale=j, n_copies=2), n_jobs=-1).fit(X_train, Y_train)
    Y_hat = model.predict(X_test)
    f1_sc[i] = metrics.f1_score(Y_test, Y_hat, average='weighted')
    
scores_table = np.concatenate((rescale_arr1.reshape(-1,1), np.array(f1_sc).reshape(-1,1)), axis=1)
pd.DataFrame(scores_table, columns=['rescale', 'F1 score'])

Unnamed: 0,rescale,F1 score
0,0.1,0.858667
1,0.5,0.782069
2,1.0,0.781982
3,1.5,0.798777
4,2.0,0.823116
5,2.5,0.807946
6,3.0,0.719942
7,3.5,0.667501
8,4.0,0.5801
9,4.5,0.536765


In [11]:
model = OneVsOneClassifier(HQC_no_complex(rescale=0.1, n_copies=2), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1,
       1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 1,
       1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2], dtype=int64)

In [12]:
model = OneVsOneClassifier(HQC_no_complex(rescale=9, n_copies=2), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

In [13]:
# n_copies=3
f1_sc = np.empty_like(rescale_arr1)
for i, j in enumerate(rescale_list1):
    model = OneVsOneClassifier(HQC_no_complex(rescale=j, n_copies=3), n_jobs=-1).fit(X_train, Y_train)
    Y_hat = model.predict(X_test)
    f1_sc[i] = metrics.f1_score(Y_test, Y_hat, average='weighted')
    
scores_table = np.concatenate((rescale_arr1.reshape(-1,1), np.array(f1_sc).reshape(-1,1)), axis=1)
pd.DataFrame(scores_table, columns=['rescale', 'F1 score'])

Unnamed: 0,rescale,F1 score
0,0.1,0.851
1,0.5,0.805079
2,1.0,0.781982
3,1.5,0.789667
4,2.0,0.815216
5,2.5,0.823116
6,3.0,0.823116
7,3.5,0.769216
8,4.0,0.6942
9,4.5,0.649098


In [14]:
model = OneVsOneClassifier(HQC_no_complex(rescale=0.1, n_copies=3), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1,
       1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 1,
       1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2], dtype=int64)

In [15]:
model = OneVsOneClassifier(HQC_no_complex(rescale=9, n_copies=3), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

In [24]:
# n_copies=4
f1_sc = np.empty_like(rescale_arr1)
for i, j in enumerate(rescale_list1):
    model = OneVsOneClassifier(HQC_no_complex(rescale=j, n_copies=4), n_jobs=-1).fit(X_train, Y_train)
    Y_hat = model.predict(X_test)
    f1_sc[i] = metrics.f1_score(Y_test, Y_hat, average='weighted')
    
scores_table = np.concatenate((rescale_arr1.reshape(-1,1), np.array(f1_sc).reshape(-1,1)), axis=1)
pd.DataFrame(scores_table, columns=['rescale', 'F1 score'])

Unnamed: 0,rescale,F1 score
0,0.1,0.851067
1,0.5,0.805079
2,1.0,0.781982
3,1.5,0.781982
4,2.0,0.790074
5,2.5,0.815216
6,3.0,0.823116
7,3.5,0.823116
8,4.0,0.769216
9,4.5,0.711459


In [25]:
model = OneVsOneClassifier(HQC_no_complex(rescale=0.1, n_copies=4), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1,
       1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2], dtype=int64)

In [26]:
model = OneVsOneClassifier(HQC_no_complex(rescale=9, n_copies=4), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

In [27]:
# OneVsRestClassifier
# n_copies=1
f1_sc = np.empty_like(rescale_arr1)
for i, j in enumerate(rescale_list1):
    model = OneVsRestClassifier(HQC_no_complex(rescale=j, n_copies=1), n_jobs=-1).fit(X_train, Y_train)
    Y_hat = model.predict(X_test)
    f1_sc[i] = metrics.f1_score(Y_test, Y_hat, average='weighted')
    
scores_table = np.concatenate((rescale_arr1.reshape(-1,1), np.array(f1_sc).reshape(-1,1)), axis=1)
pd.DataFrame(scores_table, columns=['rescale', 'F1 score'])

Unnamed: 0,rescale,F1 score
0,0.1,0.820319
1,0.5,0.782069
2,1.0,0.781982
3,1.5,0.807321
4,2.0,0.823116
5,2.5,0.823116
6,3.0,0.823116
7,3.5,0.823116
8,4.0,0.823116
9,4.5,0.823116


In [28]:
model = OneVsRestClassifier(HQC_no_complex(rescale=0.1, n_copies=1), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1,
       1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2], dtype=int64)

In [29]:
model = OneVsRestClassifier(HQC_no_complex(rescale=9, n_copies=1), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1,
       1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2], dtype=int64)

In [30]:
# n_copies=2
f1_sc = np.empty_like(rescale_arr1)
for i, j in enumerate(rescale_list1):
    model = OneVsRestClassifier(HQC_no_complex(rescale=j, n_copies=2), n_jobs=-1).fit(X_train, Y_train)
    Y_hat = model.predict(X_test)
    f1_sc[i] = metrics.f1_score(Y_test, Y_hat, average='weighted')
    
scores_table = np.concatenate((rescale_arr1.reshape(-1,1), np.array(f1_sc).reshape(-1,1)), axis=1)
pd.DataFrame(scores_table, columns=['rescale', 'F1 score'])

Unnamed: 0,rescale,F1 score
0,0.1,0.858789
1,0.5,0.789614
2,1.0,0.781982
3,1.5,0.789733
4,2.0,0.807321
5,2.5,0.815216
6,3.0,0.823116
7,3.5,0.823116
8,4.0,0.823116
9,4.5,0.823116


In [31]:
model = OneVsRestClassifier(HQC_no_complex(rescale=0.1, n_copies=2), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1,
       1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2], dtype=int64)

In [32]:
model = OneVsRestClassifier(HQC_no_complex(rescale=9, n_copies=2), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1,
       1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2], dtype=int64)

In [33]:
# n_copies=3
f1_sc = np.empty_like(rescale_arr1)
for i, j in enumerate(rescale_list1):
    model = OneVsRestClassifier(HQC_no_complex(rescale=j, n_copies=3), n_jobs=-1).fit(X_train, Y_train)
    Y_hat = model.predict(X_test)
    f1_sc[i] = metrics.f1_score(Y_test, Y_hat, average='weighted')
    
scores_table = np.concatenate((rescale_arr1.reshape(-1,1), np.array(f1_sc).reshape(-1,1)), axis=1)
pd.DataFrame(scores_table, columns=['rescale', 'F1 score'])

Unnamed: 0,rescale,F1 score
0,0.1,0.851067
1,0.5,0.805079
2,1.0,0.781982
3,1.5,0.781982
4,2.0,0.79767
5,2.5,0.807321
6,3.0,0.815216
7,3.5,0.823116
8,4.0,0.823116
9,4.5,0.823116


In [34]:
model = OneVsRestClassifier(HQC_no_complex(rescale=0.1, n_copies=3), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1,
       1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2], dtype=int64)

In [35]:
model = OneVsRestClassifier(HQC_no_complex(rescale=9, n_copies=3), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1,
       1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2], dtype=int64)

In [43]:
# n_copies=4
f1_sc = np.empty_like(rescale_arr1)
for i, j in enumerate(rescale_list1):
    model = OneVsRestClassifier(HQC_no_complex(rescale=j, n_copies=4), n_jobs=-1).fit(X_train, Y_train)
    Y_hat = model.predict(X_test)
    f1_sc[i] = metrics.f1_score(Y_test, Y_hat, average='weighted')
    
scores_table = np.concatenate((rescale_arr1.reshape(-1,1), np.array(f1_sc).reshape(-1,1)), axis=1)
pd.DataFrame(scores_table, columns=['rescale', 'F1 score'])

Unnamed: 0,rescale,F1 score
0,0.1,0.851067
1,0.5,0.805079
2,1.0,0.789614
3,1.5,0.781982
4,2.0,0.789733
5,2.5,0.790452
6,3.0,0.815216
7,3.5,0.815216
8,4.0,0.823116
9,4.5,0.823116


In [44]:
model = OneVsRestClassifier(HQC_no_complex(rescale=0.1, n_copies=4), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1,
       1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2], dtype=int64)

In [45]:
model = OneVsRestClassifier(HQC_no_complex(rescale=9, n_copies=4), n_jobs=-1).fit(X_train, Y_train)
Y_hat = model.predict(X_test)
Y_hat

array([2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1,
       1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2], dtype=int64)

In [36]:
# Comparing performance with SVM
from sklearn import svm

In [53]:
model = svm.SVC(kernel='rbf', decision_function_shape='ovo').fit(X_train, Y_train)
Y_hat = model.predict(X_test)
metrics.f1_score(Y_test, Y_hat, average='weighted')

0.8434810434782607

In [54]:
Y_hat

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1,
       1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2], dtype=int64)

In [48]:
model = svm.SVC(kernel='rbf', decision_function_shape='ovr').fit(X_train, Y_train)
Y_hat = model.predict(X_test)
metrics.f1_score(Y_test, Y_hat, average='weighted')

0.8434810434782607

In [49]:
Y_hat

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1,
       1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,
       2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1,
       1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2], dtype=int64)