In [2]:
import numpy as np 
from sklearn.datasets import load_iris
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler 

seed_value = 42   ## An arbitrary seed value
wine = load_wine()        ## To make experiments on other datasetsi alter here !!!!

print('Wine dataset shape:', wine.data.shape)
print("wine target shape:", wine.target.shape)
print('Unique classes: ',np.unique(wine.target))  
print("Instances per class:", np.bincount(wine.target))

X = wine.data
y = wine.target

print("X:  ", X)
print("y: " , y)

Wine dataset shape: (178, 13)
wine target shape: (178,)
Unique classes:  [0 1 2]
Instances per class: [59 71 48]
X:   [[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]]
y:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


In [3]:
def select_5_percent_samples(X, y, seed):

    np.random.seed(seed)  ## Seed has been fixed to 42 in the beginning of the code.
    # Determine the number of samples to select from each class (5%)
    num_samples_per_class = {label: int(np.ceil(0.05 * np.sum(y == label))) for label in np.unique(y)}

    ## For every unique class, I choose the (ceiling_%5) amount of instances. 
    ## I tried using floor function but the instances per class came out to be too low to get meaningful results.

    # Initialize arrays to store the indices of labeled and unlabeled data
    labeled_indices = np.array([], dtype=int)
    unlabeled_indices = np.array([], dtype=int)

    # Select 5% from each class as labeled data
    for label in np.unique(y):
        indices = np.where(y == label)[0] 
        
    ## This will give me the indices of the instances which belong to the class 'label', which is a number from 0 to 2.
        np.random.shuffle(indices)
        label_indices = indices[:num_samples_per_class[label]]
        unlabeled_indices = np.concatenate((unlabeled_indices, indices[num_samples_per_class[label]:]))
        labeled_indices = np.concatenate((labeled_indices, label_indices))

    # Split the data into labeled and unlabeled sets
    X_labeled = X[labeled_indices]
    y_labeled = y[labeled_indices]
    
    X_unlabeled = X[unlabeled_indices]
    y_unlabeled = y[unlabeled_indices]
    
    return X_labeled, y_labeled, X_unlabeled, y_unlabeled

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

In [4]:
alpha = 0.5  ## Damping factor
teta = 0.01  ## Treshold value to determine the strong affinities.
## These two paramerer values are also mentioned in the paper, p.7 
from sklearn.neighbors import NearestNeighbors
k = 6 ## As sugested in the paper, page 7

def construct_neighborhood_indicator_matrix(X):

    # Initialize the nearest neighbors model and fit it to the labeled data
    nn = NearestNeighbors(algorithm='auto', metric="euclidean").fit(X)

    distances, indices = nn.kneighbors(X) ## For this matrix, we are not interested in distances, only indices.

    # Initialize the neighborhood indicator matrix P with zeros
    P = np.zeros((len(X), len(X)))

    # Fill in the neighborhood indicator matrix P for the labeled samples
    for i in range(len(X)):
        P[i, indices[i]] = 1 / k
    
    return P


def affinity_propagation(P, y_labeled, alpha, teta):
    # Initialize the affinity matrix W_0
    n_samples = P.shape[0]
    W_0 = np.zeros((n_samples, n_samples)) ## np.eye(n_samples) ## da olabilir. Tekrar iyi bak.
    
    ## Initially creates a matix full of zeros with the respective dimensions
    
    labeled_indices = np.where(y_labeled != -1)[0]

    # Set affinities for similar and dissimilar pairs
    for i, idx_i in enumerate(labeled_indices):
        for j, idx_j in enumerate(labeled_indices):
            if y_labeled[i] == y_labeled[j]: ## Initially we mark similar points as if in 
                W_0[idx_i, idx_j] = 1  ## Similar pairs, as written in the paper
            else:
                W_0[idx_i, idx_j] = -1  ## Dissimilar pairs
    
    ## W0 has been created using %5 labeled indices. 
    ## Now we need to propagate the affinities through the neighborhood structure.

    # Propagate affinities through the neighborhood structure
    W = np.zeros_like(W_0) ##Creates an empty matrix with the same dimensions as W_0.

    for _ in range(n_samples):
        W = (1 - alpha) * W_0 + alpha * np.dot(P, W)
        
        # Apply the threshold to determine strong affinities
        W[np.abs(W) < teta] = 0
    
    return W

def step_2(W,X):

    W1 = np.sum(W, axis=1)  # This computes the sum of each row (axis=1 for rows)
    D = np.diag(W1)  # This creates a diagonal matrix D from the vector W1

    # Compute the graph Laplacian L
    L = D - W

    # Compute the matrix T
    T = X.T @ L @ X  ##  @ stands for matrix multiplication in numpy

    return T

In [5]:
# Use the function to perform affinity propagation
alpha = 0.5  # Example value for the damping factor
threshold = 0.01  # Example threshold value for strong affinities
X_labeled, y_labeled, X_unlabeled, y_unlabeled = select_5_percent_samples(X, y, seed_value)

# Construct neighborhood indicator matrix P for the labeled data
P = construct_neighborhood_indicator_matrix(X)

print("P: ", P)  # Show the constructed matrix P for verification
row_sums = np.sum(P, axis=1)

W = affinity_propagation(P, y_labeled, alpha, teta)
T = step_2(W,X) ## It is basically an (n * n) square matrix where n = #features of X

## Verify the affinity matrix

print("W: ",W)  
print("T: ", T)

print("P shape: ", P.shape)
print("W shape: ", W.shape)
print("T shape: ", T.shape)

P:  [[0.16666667 0.         0.         ... 0.         0.         0.        ]
 [0.         0.16666667 0.         ... 0.         0.         0.        ]
 [0.         0.         0.16666667 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.16666667 0.16666667 0.        ]
 [0.         0.         0.         ... 0.16666667 0.16666667 0.        ]
 [0.         0.         0.         ... 0.         0.         0.16666667]]


W:  [[0.55961542 0.55961542 0.55961542 ... 0.         0.         0.        ]
 [0.42764579 0.42764579 0.42764579 ... 0.         0.         0.        ]
 [0.56818182 0.56818182 0.56818182 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]
T:  [[-9.54568428e+00 -6.08554032e+00  1.12907058e+01 -9.22562531e+01
   6.81648480e+02 -1.03504556e+00  7.21936637e+00  2.76313960e-01
   1.21207111e+01 -2.04612111e+01 -4.03095201e+00  4.08978234e+01
  -5.11712340e+03]
 [ 2.21622812e+00 -8.23761702e+00  4.16420269e-01 -2.79527470e+01
   8.79313623e+01  4.52387227e-01  1.45524621e+00 -1.51501332e-01
   9.39567509e-01  2.95012153e+00 -2.66628867e-01  5.92544342e+00
  -4.69749019e+02]
 [ 5.28707754e-01 -2.95295538e+00  1.51490911e-01 -2.98445964e+01
   8.17996735e+01 -9.18279040e-01  9.85603175e-0

In [6]:
def h_sigma(M, rho, sigma):
    # Apply Nesterov's smoothing technique
    ## to smooth the l1 term 
    U_star = np.minimum(rho, np.maximum(M / sigma, -rho))
    
    return np.trace(U_star.T @ M) - (sigma / 2) * np.linalg.norm(U_star, 'fro')**2

def grad_h_sigma(M, rho, sigma):
    return np.minimum(rho, np.maximum(M / sigma, -rho))

def f(M, Sigma):
    fM = -np.log(np.linalg.det(M)) + np.trace(Sigma @ M)
    return fM

def grad_f(M,Σ):
    grad_fM = -np.linalg.inv(M) + Σ
    return grad_fM

def Σ(X,y_labeled, alpha, beta):
    M0 = np.eye(X.shape[1])  # Initialize M0 as the identity matrix
    P = construct_neighborhood_indicator_matrix(X)
    W = affinity_propagation(P, y_labeled, alpha, teta)
    T = step_2(W)
    Σ = np.linalg.inv(M0) + beta * T  
    return Σ

In [7]:
def ALM(M_init, Y_init, mu, rho, sigma, Σ, max_iter=500):
    M = M_init
    Y = Y_init
    
    for _ in range(max_iter):

        # Step 1: Update M
        grad_h_Y = grad_h_sigma(Y, rho, sigma)
        M_next = M - mu * (grad_h_Y + grad_f(M,Σ))
        
        # Step 2: Update Y
        grad_f_M_next = grad_f(M_next,Σ)
        Y_next = Y - mu * (grad_f_M_next + grad_h_sigma(M_next, rho, sigma))
        
        if np.linalg.norm(M_next - M, 'fro') < 1e-6:
            break
         
        # Prepare for next iteration
        M = M_next
        Y = Y_next
    return M

In [8]:
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

rho = 10000 ## Tuning (smoothness parameter)
beta = 2   ## Tuning parameter
mu = 0.000001  # Update step size
sigma = 0.000001   # Smoothness parameter

def run_s3ml(X_labeled, y_labeled, X_unlabeled, y_unlabeled, M0, Y0, mu, rho, sigma, beta):

    # Construct neighborhood indicator matrix P for the labeled data
    P = construct_neighborhood_indicator_matrix(X)
    # Apply affinity propagation to obtain the affinity matrix W
    W = affinity_propagation(P, y_labeled, alpha, teta)
    # Compute matrix T
    T = step_2(W,X)

    # Set Σ = np.linalg.inv(M0) + beta * T  
    Σ = np.linalg.inv(M0) + beta * T

    # Run ALM to obtain the sparse metric matrix M
    M = ALM( M0, Y0, mu, rho, sigma, Σ)
    
    knn = KNeighborsClassifier(n_neighbors=1, metric="mahalanobis" , metric_params={'VI': np.linalg.inv(M)})
    knn.fit(X_labeled, y_labeled) ## Model training phase with the help of labeled data points.

    # Predict labels for unlabeled data
    y_pred = knn.predict(X_unlabeled)
    # Calculate accuracy score and error rate
    accuracy = accuracy_score(y_unlabeled, y_pred)
    error_rate = 1 - accuracy
    print("S3ML Error rate: ", error_rate)
    return error_rate

In [9]:
def experiment_S3ML(experiment_number,  seed_value,mu, rho, sigma, beta, avg_S3ML_error_rate):
    for _ in range(experiment_number):
        X_labeled, y_labeled, X_unlabeled, y_unlabeled = select_5_percent_samples(X, y, seed_value)
        M0 = np.identity(X.shape[1])
        Y0 = np.zeros_like(M0)

        avg_S3ML_error_rate += run_s3ml(X_labeled, y_labeled, X_unlabeled, y_unlabeled, M0, Y0, mu, rho, sigma, beta)
        seed_value += 1
        
    avg_S3ML_error_rate = avg_S3ML_error_rate/experiment_number

    print("Average error rate of S3ML after ", experiment_number ,"tests: ", avg_S3ML_error_rate)

experiment_S3ML(50,  seed_value, mu, rho, sigma, beta,0)
## 7 seconds on Wine and 0.5 seconds on Iris.


S3ML Error rate:  0.07738095238095233
S3ML Error rate:  0.13095238095238093
S3ML Error rate:  0.1071428571428571
S3ML Error rate:  0.1071428571428571
S3ML Error rate:  0.24404761904761907
S3ML Error rate:  0.11309523809523814
S3ML Error rate:  0.1607142857142857
S3ML Error rate:  0.07738095238095233
S3ML Error rate:  0.1785714285714286
S3ML Error rate:  0.15476190476190477
S3ML Error rate:  0.11904761904761907
S3ML Error rate:  0.0714285714285714
S3ML Error rate:  0.13690476190476186
S3ML Error rate:  0.1071428571428571
S3ML Error rate:  0.11904761904761907
S3ML Error rate:  0.125
S3ML Error rate:  0.13095238095238093
S3ML Error rate:  0.17261904761904767
S3ML Error rate:  0.1785714285714286
S3ML Error rate:  0.1964285714285714
S3ML Error rate:  0.1785714285714286
S3ML Error rate:  0.11309523809523814
S3ML Error rate:  0.24404761904761907
S3ML Error rate:  0.125
S3ML Error rate:  0.1428571428571429
S3ML Error rate:  0.20238095238095233
S3ML Error rate:  0.11904761904761907
S3ML Error r

KeyboardInterrupt: 

In [None]:
###     Grid search (extra):
def search_optimal_parameters(X, y, seed_value, mu, sigma, start_value=1, max_value=1000, experiment_number=3):
    best_rho = start_value
    best_beta = start_value
    best_error_rate = float('inf')

    rho = start_value
    beta = start_value

    while rho <= max_value or beta <= max_value:
        avg_error_rate = 0
        for _ in range(experiment_number):
            X_labeled, y_labeled, X_unlabeled, y_unlabeled = select_5_percent_samples(X, y, seed_value)
            M0 = np.identity(X.shape[1])
            Y0 = np.zeros_like(M0)

            error_rate = run_s3ml(X_labeled, y_labeled, X_unlabeled, y_unlabeled, M0, Y0, mu, rho, sigma, beta)
            seed_value += 1
            avg_error_rate += error_rate

        avg_error_rate /= experiment_number

        print(f"rho: {rho}, beta: {beta}, avg error rate: {avg_error_rate}")

        if avg_error_rate < best_error_rate:
            best_error_rate = avg_error_rate
            best_rho = rho
            best_beta = beta

        if rho <= max_value:
            rho *= 2

        elif beta <= max_value:
            beta *= 2
            rho = 1  # Reset rho to 1 when beta increases

        if rho > max_value and beta > max_value:
            break

    return best_rho, best_beta, best_error_rate

# Initial parameters
mu = 1e-6
sigma = 1e-6
seed_value = 42

best_rho, best_beta, best_error_rate = search_optimal_parameters(X, y, seed_value, mu, sigma)
print(f"Best rho: {best_rho}, Best beta: {best_beta}, Best error rate: {best_error_rate}")

S3ML Error rate:  0.27380952380952384
S3ML Error rate:  0.3571428571428571
S3ML Error rate:  0.25595238095238093
rho: 1, beta: 1, avg error rate: 0.29563492063492064
S3ML Error rate:  0.27380952380952384
S3ML Error rate:  0.3392857142857143
S3ML Error rate:  0.20238095238095233
rho: 2, beta: 1, avg error rate: 0.2718253968253968
S3ML Error rate:  0.1964285714285714
S3ML Error rate:  0.30952380952380953
S3ML Error rate:  0.375
rho: 4, beta: 1, avg error rate: 0.29365079365079366
S3ML Error rate:  0.36904761904761907
S3ML Error rate:  0.14880952380952384
S3ML Error rate:  0.2857142857142857
rho: 8, beta: 1, avg error rate: 0.26785714285714285
S3ML Error rate:  0.22023809523809523
S3ML Error rate:  0.29166666666666663
S3ML Error rate:  0.27380952380952384
rho: 16, beta: 1, avg error rate: 0.2619047619047619
S3ML Error rate:  0.22619047619047616
S3ML Error rate:  0.25595238095238093
S3ML Error rate:  0.2857142857142857
rho: 32, beta: 1, avg error rate: 0.25595238095238093
S3ML Error rate: 

In [None]:
## Random search (extra):
from sklearn.model_selection import KFold

def random_search_s3ml(X, y, folds, num_samples, rho_range, beta_range, mu, sigma, seed_value):
    np.random.seed(seed_value)
    kf = KFold(n_splits=folds, shuffle=True, random_state=seed_value)
    best_rho = None
    best_beta = None
    best_error_rate = float('inf')

    for _ in range(num_samples):
        rho = np.random.uniform(*rho_range)
        beta = np.random.uniform(*beta_range)
        error_rates = []

        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            X_labeled, y_labeled, X_unlabeled, y_unlabeled = select_5_percent_samples(X_train, y_train, seed_value)
            M0 = np.identity(X.shape[1])
            Y0 = np.zeros_like(M0)

            error_rate = run_s3ml(X_labeled, y_labeled, X_unlabeled, y_unlabeled, M0, Y0, mu, rho, sigma, beta)
            error_rates.append(error_rate)
            seed_value += 1

        avg_error_rate = np.mean(error_rates)
        if avg_error_rate < best_error_rate:
            best_error_rate = avg_error_rate
            best_rho = rho
            best_beta = beta

        print(f"Sampling: rho={rho:.2f}, beta={beta:.2f}, Avg Error Rate={avg_error_rate:.4f}")

    return best_rho, best_beta, best_error_rate

rho_range = (1e-2, 1e4)  
beta_range = (1e-2, 1e2)  

mu = 1e-6
sigma = 1e-6
seed_value = 42
folds = 10
num_samples = 100  # Number of random samples to test

best_rho, best_beta, best_error_rate = random_search_s3ml(X, y, folds, num_samples, rho_range, beta_range, mu, sigma, seed_value)
print(f"Best rho: {best_rho}, Best beta: {best_beta}, Best error rate: {best_error_rate:.4f}")

S3ML Error rate:  0.7666666666666666
S3ML Error rate:  0.6
S3ML Error rate:  0.8466666666666667
S3ML Error rate:  0.8466666666666667
S3ML Error rate:  0.86
S3ML Error rate:  0.6133333333333333
S3ML Error rate:  0.7866666666666666
S3ML Error rate:  0.6821192052980132
S3ML Error rate:  0.814569536423841
S3ML Error rate:  0.880794701986755
Sampling: rho=3745.41, beta=95.07, Avg Error Rate=0.7697
S3ML Error rate:  0.2733333333333333
S3ML Error rate:  0.17333333333333334
S3ML Error rate:  0.43999999999999995
S3ML Error rate:  0.1266666666666667
S3ML Error rate:  0.22666666666666668
S3ML Error rate:  0.14
S3ML Error rate:  0.2466666666666667
S3ML Error rate:  0.7086092715231789
S3ML Error rate:  0.2185430463576159
S3ML Error rate:  0.16556291390728473
Sampling: rho=6844.84, beta=83.46, Avg Error Rate=0.2719
S3ML Error rate:  0.6
S3ML Error rate:  0.5733333333333333
S3ML Error rate:  0.72
S3ML Error rate:  0.84
S3ML Error rate:  0.7333333333333334
S3ML Error rate:  0.64
S3ML Error rate:  0.86

In [None]:
## Grid search based KFold cross-validation (extra):
from sklearn.model_selection import KFold

def KFoldC_s3ml(X, y, folds, rho_values, beta_values, mu, sigma, seed_value):
    kf = KFold(n_splits= folds, shuffle= True, random_state= seed_value)
    best_rho ,best_beta ,best_error_rate = None, None, float('inf')

    for rho in rho_values:
        for beta in beta_values:
            error_rates = []
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]

                X_labeled, y_labeled, X_unlabeled, y_unlabeled = select_5_percent_samples(X_train, y_train, seed_value)
                M0 = np.identity(X.shape[1])
                Y0 = np.zeros_like(M0)

                error_rate = run_s3ml(X_labeled, y_labeled, X_unlabeled, y_unlabeled, M0, Y0, mu, rho, sigma, beta)
                error_rates.append(error_rate)
                seed_value += 1

            avg_error_rate = np.mean(error_rates)
            if avg_error_rate < best_error_rate:
                best_error_rate = avg_error_rate
                best_rho = rho
                best_beta = beta
            print(f"Search: rho={rho:.2f}, beta={beta:.2f}, Avg Error Rate={avg_error_rate:.4f}")

    return best_rho, best_beta, best_error_rate

rho_values = np.logspace(-2, 4, num=10)  # Logarithmically spaced values from 0.01 to 10000
beta_values = np.logspace(-2, 2, num=10)  # Logarithmically spaced values from 0.01 to 100

mu = 1e-6
sigma = 1e-6
seed_value = 42
folds = 10

best_rho, best_beta, best_error_rate = KFoldC_s3ml(X, y, folds, rho_values, beta_values, mu, sigma, seed_value)
print(f"Best rho: {best_rho}, Best beta: {best_beta}, Best error rate: {best_error_rate:.4f}")


S3ML Error rate:  0.29333333333333333
S3ML Error rate:  0.33333333333333337
S3ML Error rate:  0.4
S3ML Error rate:  0.30000000000000004
S3ML Error rate:  0.3733333333333333
S3ML Error rate:  0.2733333333333333
S3ML Error rate:  0.26
S3ML Error rate:  0.2847682119205298
S3ML Error rate:  0.2582781456953642
S3ML Error rate:  0.2715231788079471
Grid Search: rho=0.01, beta=0.01, Avg Error Rate=0.3048
S3ML Error rate:  0.2866666666666666
S3ML Error rate:  0.2733333333333333
S3ML Error rate:  0.2466666666666667
S3ML Error rate:  0.26
S3ML Error rate:  0.24
S3ML Error rate:  0.2733333333333333
S3ML Error rate:  0.31999999999999995
S3ML Error rate:  0.2450331125827815
S3ML Error rate:  0.304635761589404
S3ML Error rate:  0.48344370860927155
Grid Search: rho=0.01, beta=0.03, Avg Error Rate=0.2933
S3ML Error rate:  0.33333333333333337
S3ML Error rate:  0.2733333333333333
S3ML Error rate:  0.2866666666666666
S3ML Error rate:  0.2666666666666667
S3ML Error rate:  0.2666666666666667
S3ML Error rate