In [None]:
# Project for the exam of Signal Processing and Optimization for Big Data
# Master degree in computer engineering, curriculum Data Science
# Matteo Rinalduzzi - Università degli Studi di Perugia


# This project aims to investigate 3 aspects:
# - PART 1: implementation of SVM centralized version to evaluate the effect of L1 norm as lambda changes
# - PART 2: comparison between centralized version and distributed version with ADMM, splitting
#           across data (framework of CONSENSUS OPTIMIZATION)
# - PART 3: distributed algorithm applied to a simple dataset of quasi linearly separable data in two dimensions
#           and plot of the straight lines in the plane tha separate the data (multiclass classification)


# ------ PART 1: Norm L1 effect -------
import numpy as np
np.random.seed(1)

# Data generation
n = 20 #n. of features
m = 1000 #n. of examples
beta_true = np.random.randn(n,1) # hyperplane coefficients (beta)
offset = np.random.randn(1) # intercept (beta_0)

# I generate a set of linearly separable data from the hyperplane identified by (beta_true, offset)
X = np.random.normal(0, 5, size=(m,n))
Y = np.sign(X.dot(beta_true) + offset)


In [None]:
# Setting of the centralized problem
import cvxpy as cp
beta = cp.Variable((n,1))
v = cp.Variable()
loss = cp.sum(cp.pos(1 - cp.multiply(Y, X @ beta + v)))
reg = cp.norm(beta, 1)
lambd = cp.Parameter(nonneg=True)
prob = cp.Problem(cp.Minimize(loss/m + lambd*reg))

In [None]:
# Problem solved for different values of lambda
TRIALS = 100
lambda_vals = np.logspace(-2, 0, TRIALS) # lambda values from 0.01 to 1
#lambda_vals = np.linspace(0.01, 1, TRIALS) # from 0.01 to 1
beta_vals = []
for i in range(TRIALS):
    lambd.value = lambda_vals[i]
    prob.solve()
    beta_vals.append(beta.value)

In [None]:
# Plot beta trend as a function of lambda, normalized by the smallest coefficient
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
beta_vals_norm = np.copy(beta_vals)
for i in range(len(beta_vals)):
    if np.max(beta_vals[i]) > abs(np.min(beta_vals[i])):
        beta_vals_norm[i] = beta_vals[i]/np.max(beta_vals[i])
    else:
        beta_vals_norm[i] = beta_vals[i]/abs(np.min(beta_vals[i]))

for i in range(n):
    plt.plot(lambda_vals, [wi[i,0] for wi in beta_vals_norm])
plt.ylabel(r"$\beta$", fontsize=16).set_rotation(0)
plt.xlabel(r"$\lambda$", fontsize=16)
plt.xscale("log")



**Comparison between centralized code and distributed code**

In [None]:
# -------- PART 2: Comparison between centralized code and distributed code ------
import numpy as np
import math
np.random.seed(1)

n = 20
m = 1000
beta_true = np.random.randn(n,1)
offset = np.random.randn(1)
X = np.random.normal(0, 5, size=(m,n))
Y = np.sign(X.dot(beta_true) + offset)

beta = np.append(beta_true, offset).reshape(21)
#print('True', np.around(beta,6))

# Centralized algorithm
import cvxpy as cp
A = np.hstack((X*Y,Y))
n_features = n + 1
# Parameters
rho = 1
lamda = 0.5
C = np.identity(n_features)
C[n_features-1,n_features-1] = 0

beta = cp.Variable((n_features,1))
loss = cp.sum(cp.pos(1 - A @ beta ))
reg = cp.norm(C@beta, 1)
prob = cp.Problem(cp.Minimize(loss/m + lamda*reg))

# Solving
prob.solve()
var = beta.value
var = var.reshape((var.shape[0],))
print('Centralized solution')
print(np.around(var,6))
#print(np.around(var/np.linalg.norm(var),6))


In [None]:
# Distributed algorithm
N = 20 # n. of agentss, every agents has a small piece of the dataset
n_iter = 500 # n. of iterations
n_samples = math.floor(A.shape[0] / N) # dataset division betweeen the N agents

X = np.zeros((n_iter, N, n_features))  # X[k,i,:] is the vector x_i at the iteration k
Z = np.zeros((n_iter, n_features))     # Z[k,:] is the vector z at the iteration k
U = np.zeros((n_iter, N, n_features))  # U[k,i,:] is the vector u_i at the iteration k
LOSS_1 = np.zeros((n_iter, N))         # vector that holds the trend of the loss function of the step 1 for the N agents

for k in range(0,n_iter-1,1): #start from k+1=1 and not from 0    
    #Step 1
    count = 0
    for i in range(N):        
        x_cp = cp.Variable(n_features)
        loss = cp.sum(cp.pos(np.ones(n_samples) - A[count:count+n_samples,:] @ x_cp))
        reg = cp.sum_squares(x_cp - Z[k,:] + U[k,i,:])
        aug_lagr = loss/m + (rho/2)*reg
        prob = cp.Problem(cp.Minimize(aug_lagr))
        prob.solve(solver=cp.ECOS) #verbose=True, adaptive_rho = False, 
        X[k+1,i,:] = x_cp.value
        # LOSS computation
        for j in range(n_samples):
            cost = 1 - np.inner(A[count+j,:], X[k+1,i,:])
            if cost >0:
                LOSS_1[k+1,i] += cost
        LOSS_1[k+1,i] += rho/2 * np.linalg.norm(X[k+1,i,:] - Z[k,:] + U[k,i,:])**2
        
        count += n_samples
    
    
    #Step 2
    mean_X = np.zeros(n_features)
    mean_U = np.zeros(n_features)
    for i in range(N):
        mean_X += X[k+1,i,:]
        mean_U += U[k,i,:]
    mean_X = 1/N * mean_X
    mean_U = 1/N * mean_U
    
    for i in range(n_features-1):
        if mean_X[i] + mean_U[i] > lamda/(N*rho):
            Z[k+1,i] = mean_X[i] + mean_U[i] - lamda/(N*rho)
        elif mean_X[i] + mean_U[i] < - lamda/(N*rho):
            Z[k+1,i] = mean_X[i] + mean_U[i] + lamda/(N*rho)
        else:
            Z[k+1,i] = 0
    Z[k+1,n_features-1] = mean_X[n_features-1] + mean_U[n_features-1] #l'ultima è un caso particolare
    
    
    #Step 3
    for i in range(N):
        U[k+1,i,:] = U[k,i,:] + X[k+1,i,:] - Z[k+1,:] 

        
print('Distributed solution')
print(Z[n_iter-1,:])

In [None]:
# Plot of the LOSS of step 1 for one of the N agents 
import matplotlib.pyplot as plt
plt.plot(np.linspace(0,n_iter,n_iter), LOSS_1[:,0])
plt.ylabel("LOSS", fontsize=16)
plt.xlabel("n° iterations", fontsize=16)
plt.title("Loss trend")
plt.show()

plt.plot(np.linspace(50,n_iter,450), LOSS_1[50:500,0])
plt.ylabel("LOSS", fontsize=16)
plt.xlabel("n° iterations", fontsize=16)
plt.title("Zoom loss trend")
plt.show()

**Test of distributed algorithm with a real dataset**

In [None]:
# ------- PART 3: Real dataset -------- 
import numpy as np
import pandas as pd
import math
import cvxpy as cp

df2 = pd.read_csv("../input/wall-following-robot/sensor_readings_2.csv")

df2.columns = ['SD_front', 'SD_left', 'Label']
class_names = ['Move-Forward', 'Slight-Right-Turn', 'Sharp-Right-Turn', 'Slight-Left-Turn']
output_dictionary = {'Move-Forward': 1, 'Slight-Right-Turn': 2, 'Sharp-Right-Turn': 3, 'Slight-Left-Turn': 4}

df = df2

x1 = df['SD_front'].to_numpy() # first feature
x2 = df['SD_left'].to_numpy() # second feature
y  = df['Label'].replace(output_dictionary).to_numpy()  #class

# Train-test split (80% - 20%)
train_samples = np.int(np.around(x1.shape[0]*0.8))
x1_train = x1[0:train_samples]
x2_train = x2[0:train_samples]
y_train = y[0:train_samples]

x1_test = x1[train_samples:y.size]
x2_test = x2[train_samples:y.size]
y_test = y[train_samples:y.size]

n_iter = 500


#Split across data with L1 regularization
def svm(classe1,classe2):
    A = []
    n_rows_tot = y_train.size
    n_features = 2 + 1
    for i in range(n_rows_tot):
        if y_train[i] == classe1:
            ai_t = 1 * np.array([x1_train[i], x2_train[i], 1])
            A.append(ai_t)
        elif y_train[i] == classe2:
            ai_t = -1 * np.array([x1_train[i], x2_train[i], 1])
            A.append(ai_t)
    A = np.array(A)
        
    N = 5 # (<-> n. of agents)
    m = A[:,0].size
    n_samples = math.floor(m / N)
    rho = 1
    lamda = 0.1

    X = np.zeros((n_iter, N, n_features))
    Z = np.zeros((n_iter, n_features))
    U = np.zeros((n_iter, N, n_features))
    LOSS_1 = np.zeros(n_iter)

    mean_AX = np.zeros(n_samples)

    for k in range(0,n_iter-1,1):   
        #Step 1
        count = 0
        for i in range(N):        
            x_cp = cp.Variable(n_features)
            loss = cp.sum(cp.pos(np.ones(n_samples) - A[count:count+n_samples,:] @ x_cp))
            reg = cp.sum_squares(x_cp - Z[k,:] + U[k,i,:])
            aug_lagr = loss + (rho/2)*reg
            prob = cp.Problem(cp.Minimize(aug_lagr))
            prob.solve(solver=cp.ECOS)#verbose=True, adaptive_rho = False, 
            X[k+1,i,:] = x_cp.value

            #LOSS
            for j in range(n_samples):
                cost = 1 - np.inner(A[count+j,:], X[k+1,i,:])
                if cost >0:
                    LOSS_1[k+1] += cost
                LOSS_1[k+1] += rho/2 * np.linalg.norm(X[k+1,i,:] - Z[k,:] + U[k,i,:])**2
        
            count += n_samples
    
    
        #Step 2
        mean_X = np.zeros(n_features)
        mean_U = np.zeros(n_features)
        for i in range(N):
            mean_X += X[k+1,i,:]
            mean_U += U[k,i,:]
        mean_X = 1/N * mean_X
        mean_U = 1/N * mean_U
    
        for i in range(n_features-1):
            if mean_X[i] + mean_U[i] > lamda/(N*rho):
                Z[k+1,i] = mean_X[i] + mean_U[i] - lamda/(N*rho)
            elif mean_X[i] + mean_U[i] < - lamda/(N*rho):
                Z[k+1,i] = mean_X[i] + mean_U[i] + lamda/(N*rho)
            else:
                Z[k+1,i] = 0
        Z[k+1,n_features-1] = mean_X[n_features-1] + mean_U[n_features-1]
    
    
        #Step 3
        for i in range(N):
            U[k+1,i,:] = U[k,i,:] + X[k+1,i,:] - Z[k+1,:] 

        
    print(Z[n_iter-1,:])

    return Z[n_iter-1,:];

def plot_train(beta_tilde,classe1,classe2,color1,color2):
    a = beta_tilde[0]
    b = beta_tilde[1]
    c = beta_tilde[2]
    print('-a/b',-a/b, '   -c/b',-c/b)

    ics = np.linspace(0,5,100)
    ipsilon = -a/b*ics - c/b
    e = []
    r = []
    t = []
    p = []
    for i in range(x1_train.size):
        if y_train[i] == classe1:
            e.append(x1_train[i])
            r.append(x2_train[i])
        elif y_train[i] == classe2:
            t.append(x1_train[i])
            p.append(x2_train[i])
    
    plt.plot(ics, ipsilon, '-k')
    plt.plot(e, r, color1, marker='o', linestyle="")
    plt.plot(t, p, color2, marker='o', linestyle="")


In [None]:
# ----------   TRAIN   --------------
beta_tilde_1 = svm(1,2)
beta_tilde_2 = svm(1,3)
beta_tilde_3 = svm(1,4)
beta_tilde_4 = svm(2,3)
beta_tilde_5 = svm(2,4)
beta_tilde_6 = svm(3,4)

import matplotlib.pyplot as plt
plt.figure(figsize = (12, 6))
plt.title('Plot of the lines associated with classifiers', fontsize=30)
plt.xlabel('x', fontsize=30)
plt.ylabel('y', fontsize=30).set_rotation(0)
plt.xlim(0.5, 3.5) 
plt.ylim(0, 1.5)

plot_train(beta_tilde_1,1,2,'-r','-g')
plot_train(beta_tilde_2,1,3,'-r','-b')
plot_train(beta_tilde_3,1,4,'-r','-m')
plot_train(beta_tilde_4,2,3,'-g','-b')
plot_train(beta_tilde_5,2,4,'-g','-m')
plot_train(beta_tilde_6,3,4,'-b','-m')


#plt.legend(loc='upper right', fontsize=20)
plt.grid()
plt.show()

In [None]:
# ----------   TEST   ---------------
from sklearn.metrics import confusion_matrix
# Plot confusion matrix for multiclass classification
def plot_confusion_matrix(y_test, y_pred, title, normalize=False):
    cm = confusion_matrix(y_test, y_pred)
    print(cm) # Confusion Matrix NOT normalized
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    fig, ax = plt.subplots(figsize=(10, 10))
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)

    classes = ['M-F', 'Sl-R-T', 'Sh-R-T', 'Sl-L-T']
    #classes = ['1', '2', '3', '4']
    plt.xticks(np.arange(cm.shape[1]), classes)
    plt.yticks(np.arange(cm.shape[0]), classes)
    ax.set(
        xticklabels=classes, yticklabels=classes,
        title=title,
        ylabel='True label',
        xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
    plt.setp(ax.get_yticklabels(), rotation=45, ha="right", rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    
    
y_pred = np.zeros(y_test.size,dtype='int')    
for i in range(y_test.size):
    pred_count = np.zeros(7) # the ith cell i identifies the ith classifier
    #Classifier 1
    a = beta_tilde_1[0]
    b = beta_tilde_1[1]
    c = beta_tilde_1[2]
    if x1_test[i]*a + x2_test[i]*b + c > 0: # scalar product
        pred_count[1] += 1
    else:
        pred_count[2] += 1
        
    #Classifier 2
    a = beta_tilde_2[0]
    b = beta_tilde_2[1]
    c = beta_tilde_2[2]
    if x1_test[i]*a + x2_test[i]*b + c > 0:
        pred_count[1] += 1
    else:
        pred_count[3] += 1
        
    #Classifier 3
    a = beta_tilde_3[0]
    b = beta_tilde_3[1]
    c = beta_tilde_3[2]
    if x1_test[i]*a + x2_test[i]*b + c > 0:
        pred_count[1] += 1
    else:
        pred_count[4] += 1
        
    #Classifier 4
    a = beta_tilde_4[0]
    b = beta_tilde_4[1]
    c = beta_tilde_4[2]
    if x1_test[i]*a + x2_test[i]*b + c > 0:
        pred_count[2] += 1
    else:
        pred_count[3] += 1
        
    #Classifier 5
    a = beta_tilde_5[0]
    b = beta_tilde_5[1]
    c = beta_tilde_5[2]
    if x1_test[i]*a + x2_test[i]*b + c > 0:
        pred_count[2] += 1
    else:
        pred_count[4] += 1
        
    #Classifier 6
    a = beta_tilde_6[0]
    b = beta_tilde_6[1]
    c = beta_tilde_6[2]
    if x1_test[i]*a + x2_test[i]*b + c > 0:
        pred_count[3] += 1
    else:
        pred_count[4] += 1
    
    #print(pred_count)
    #print(np.argmax(pred_count))
    y_pred[i] = np.argmax(pred_count) # majority (the index with more count is the class most likely)
    
plot_confusion_matrix(y_test, y_pred, 'Confusion matrix', normalize=False)