In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
data_train = np.array(np.loadtxt("/content/Dry_Bean_train.csv", delimiter = ",", dtype = str,skiprows= 1))
data_test = np.array(np.loadtxt("/content/Dry_Bean_test.csv", delimiter = ",", dtype = str,skiprows= 1))

In [None]:
x_train = data_train[:,:-1].astype(float)
x_label = data_train[:,-1]
y_test = data_test[:,:-1].astype(float)
y_label = data_test[:,-1]

In [None]:
scaler = StandardScaler()
scaler.fit(x_train)
x_trainnorm = scaler.transform(x_train)
scaler.fit(y_test)
y_testnorm = scaler.transform(y_test)

In [None]:
#Adding the bias to the datapoints
xn = np.column_stack((np.ones([x_trainnorm.shape[0],1],dtype=float),x_trainnorm))
yn = np.column_stack((np.ones([y_testnorm.shape[0],1],dtype=float),y_testnorm))

In [None]:
#Initializing the Weights
w = np.ones([7,17])

In [None]:
x_label.shape

(12249,)

In [None]:
def convert(x_label):
  xlabel = np.zeros(x_label.shape)
  for j in range(int(x_label.shape[0])):
    if x_label[j] == 'SEKER':
      xlabel[j] = 0
    elif x_label[j] == 'DERMASON':
      xlabel[j] = 1
    elif x_label[j] == 'BOMBAY':
      xlabel[j] = 2
    elif x_label[j] == 'HOROZ':
      xlabel[j] = 3
    elif x_label[j] == 'CALI':
      xlabel[j] = 4
    elif x_label[j] == 'SIRA':
      xlabel[j] = 5
    else:
      xlabel[j] = 6
  return xlabel


In [None]:
#Converting string output values to numerical values
xlabel = convert(x_label)
ylabel = convert(y_label)

In [None]:
xlabel

array([0., 1., 2., ..., 6., 1., 6.])

In [None]:
#Number of epochs
epochs = 100

In [None]:
accu_train = []
accu_test = []
wnorm = []
cm_list = []
wopt = []
i = 0
while (i!=10):
  J = []
  w_t = []


  for epoch in range(epochs):
    xn,xlabel = shuffle(xn,xlabel)
  
    for j in range(int(x_trainnorm.shape[0])):
      xi = np.argmax(np.dot(w, xn[j].T))
      yi =  xlabel[j]

      if xi != yi:
        w[int(xi)] -= xn[j]
        w[int(yi)] += xn[j]
      
      w_t.append(w)
      J.append(-(np.sum(np.dot(w[int(yi)],xn[j].T) - np.dot(w[int(xi)],xn[j].T))))

  J_t = np.array(J)
  w_t = np.array(w_t)
  w_t = w_t[-100:]
  J_d = J_t[-100:]
  w_opt = w_t[np.argmin(J_d)]
  wopt.append(w_opt)
  wnorm.append(np.linalg.norm(w_opt))
  train_preds = np.argmax(np.dot(xn, w_opt.T), axis=1)
  test_preds = np.argmax(np.dot(yn, w_opt.T), axis=1)

  # compute accuracies for training and testing sets
  train_acc = np.mean(train_preds == xlabel) * 100
  test_acc = np.mean(test_preds == ylabel) * 100
  cm = confusion_matrix(test_preds, ylabel)
  accu_train.append(train_acc)
  accu_test.append(test_acc)
  cm_list.append(cm)
  i = i + 1


In [None]:
cm_arr = np.array(cm_list)
train_arr = np.array(accu_train)
test_arr = np.array(accu_test)
wnorm_arr = np.array(wnorm)

In [None]:
def meanandstd(x):
  x_mean = np.mean(x,axis = 0)
  x_std = np.std(x, axis = 0)
  return x_mean,x_std

In [None]:
train_mean = np.mean(train_arr, axis = 0)
train_std = np.std(train_arr, axis = 0)

test_mean = np.mean(test_arr, axis = 0)
test_std = np.std(test_arr, axis = 0)

wnorm_mean = np.mean(wnorm_arr, axis = 0)
wnorm_std = np.std(wnorm_arr, axis = 0)

cm_mean = np.mean(cm_arr, axis = 0)
cm_std = np.std(cm_arr, axis = 0)


In [None]:
train_preds = np.argmax(np.dot(xn, w_opt.T), axis=1)
test_preds = np.argmax(np.dot(yn, w_opt.T), axis=1)

# compute accuracies for training and testing sets
train_acc = np.mean(train_preds == xlabel) * 100
test_acc = np.mean(test_preds == ylabel) * 100

# print accuracies to the console
print("Training Accuracy: {:.2f}%".format(train_acc))
print("Testing Accuracy: {:.2f}%".format(test_acc))

Training Accuracy: 88.73%
Testing Accuracy: 87.81%


In [None]:
wopt = np.array(wopt)

In [None]:
print("Training Accuracy: {:.2f}%".format(train_arr[0]))
print("Testing Accuracy: {:.2f}%".format(test_arr[0]))
print("The augmented weight vecor is: \n",wopt[0])
print("The magnitude of the augmented weight vector: {:.2f}".format(wnorm_arr[0]))
print("The confusion matrix on test set is: \n",cm_arr[0])

Training Accuracy: 88.91%
Testing Accuracy: 88.55%
The augmented weight vecor is: 
 [[ 11.         -15.30403593  31.04712318   3.28812215 -25.76298813
   11.79207293 -23.4946228  -15.82202604  -8.18965721  -5.58749306
    1.57023045 -10.23965083  -8.25383774 -34.07098956  18.35124579
   -1.68153447  10.29875989]
 [ 25.           0.22651635  34.14790735 -13.40884779   3.26046947
  -10.32348417  30.22830168  -5.44727764  -2.90665033  -0.35114532
    2.895203     4.37902124   9.88834314  45.13558852  16.67501356
    2.23897897   1.29763238]
 [-66.          48.20219321   8.70439473   7.81903808  17.86606473
   -7.68942178  -2.66865906  48.74948547  12.82160247   1.95445145
    0.59547759  25.22558286   9.24294622  42.51695555  25.80249097
    8.75250757   4.47375034]
 [  8.         -29.83115216  30.43214003   2.07689922 -12.76165282
   71.74197751 -12.73823176 -25.36855143 -17.50260832  -2.37455496
    7.35003892  -0.72023478   7.79627322  -5.74996679   9.19048728
   26.79002262 -12.524656

In [None]:
print("The training set mean: {:.2f} and standard deviation: {:.2f}:".format(train_mean,train_std))
print("The testing set mean: {:.2f} and standard deviation: {:.2f} :".format(test_mean,test_std))
print("The wnorm mean: {:.2f} and standard deviation: {:.2f} ".format(wnorm_mean,wnorm_std))
print("The confusion matrix mean: \n{:} \n \n and standard deviation:\n{:} ".format(cm_mean,cm_std))

The training set mean: 89.18 and standard deviation: 2.16:
The testing set mean: 88.60 and standard deviation: 1.99 :
The wnorm mean: 527.55 and standard deviation: 157.54 
The confusion matrix mean: 
[[1.924e+02 4.700e+00 0.000e+00 1.000e-01 1.200e+00 7.500e+00 1.400e+00]
 [6.000e+00 3.136e+02 0.000e+00 4.600e+00 0.000e+00 3.550e+01 0.000e+00]
 [0.000e+00 0.000e+00 5.180e+01 0.000e+00 0.000e+00 0.000e+00 0.000e+00]
 [0.000e+00 1.400e+00 0.000e+00 1.757e+02 4.000e+00 9.400e+00 3.000e-01]
 [0.000e+00 0.000e+00 2.000e-01 6.400e+00 1.523e+02 2.300e+00 1.310e+01]
 [4.000e+00 3.530e+01 0.000e+00 4.500e+00 1.000e+00 2.072e+02 3.500e+00]
 [6.000e-01 0.000e+00 0.000e+00 1.700e+00 4.500e+00 2.100e+00 1.137e+02]] 
 
 and standard deviation:
[[10.08166653  5.93380148  0.          0.3         0.4         6.2809235
   1.62480768]
 [ 8.40238062 17.11256848  0.          2.87054002  0.         24.26623168
   0.        ]
 [ 0.          0.          0.4         0.          0.          0.
   0.        ]
 