In [2]:
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.utils import shuffle

In [3]:
def data_split(x,y):
  xn_train = x[:,:-1]
  xn_label = x[:,-1]
  xn_test = y[:,:-1]
  yn_label = y[:,-1]
  return xn_train, xn_label, xn_test, yn_label

In [4]:
def ref(x,y):
  z = np.zeros([y.shape[0],1])
  for i in range(y.shape[0]):
    if y[i] == 1:
      z[i] = 1
    else:
      z[i] = -1
  #xn = np.column_stack((np.ones([x.shape[0],1],dtype=float),x))
  g = np.multiply(x, z)
  return x, g 

def crit(w,x):
  j = 0
  l = int(x.shape[0])
  for i in range(l):
    if (np.dot(w.T, x[i,:]) <= 0):
      j = (j - np.dot(w.T, x[i,:]))
  return j

def predict(w,x):
  w_pred = np.zeros([int(x.shape[0]),1])
  for i in range(int(x.shape[0])):
    z = np.dot(w.T, x[i,:])
    if z < 0:
      w_pred[i] = 2
    else:
      w_pred[i] = 1
  
  return w_pred

def error(x,y):
  count = 0
  for i in range(int(x.shape[0])):
    if x[i]!=y[i]:
      count = count + 1
  err = (count/int(x.shape[0]))*100
  return err

In [12]:
data_train = np.array(np.loadtxt("/content/dataset1_train.csv", delimiter = ",", dtype = float))
data_test = np.array(np.loadtxt("/content/dataset1_test.csv", delimiter = ",", dtype = float))

In [13]:
xn_train,xn_label,xn_test,y_label = data_split(data_train, data_test)

In [14]:
xn1 = np.column_stack((np.ones([xn_train.shape[0],1],dtype=float),xn_train))
n_features = xn1.shape[1]
X3 = np.zeros((xn1.shape[0], n_features * (n_features + 1) * (n_features + 2) // 6))
k = 0
for i in range(n_features):
    X3[:, k] = xn1[:, i] ** 3  # cubic term
    k += 1
    for j in range(i + 1, n_features):
        X3[:, k] = xn1[:, i] ** 2 * xn1[:, j]  # squared and linear interaction term
        k += 1
        X3[:, k] = xn1[:, i] * xn1[:, j] ** 2  # linear and squared interaction term
        k += 1
        for l in range(j + 1, n_features):
            X3[:, k] = xn1[:, i] * xn1[:, j] * xn1[:, l]  # triple interaction term
            k += 1

# Combine the original and new features
x_new = np.hstack((xn1, X3))

xn, g = ref(x_new,xn_label)

In [15]:
#Testing Dataset
test_xn = np.column_stack((np.ones([xn_test.shape[0],1],dtype=float),xn_test))
n_features = test_xn.shape[1]
X3 = np.zeros((test_xn.shape[0], n_features * (n_features + 1) * (n_features + 2) // 6))
k = 0
for i in range(n_features):
    X3[:, k] = test_xn[:, i] ** 3  # cubic term
    k += 1
    for j in range(i + 1, n_features):
        X3[:, k] = test_xn[:, i] ** 2 * test_xn[:, j]  # squared and linear interaction term
        k += 1
        X3[:, k] = test_xn[:, i] * test_xn[:, j] ** 2  # linear and squared interaction term
        k += 1
        for l in range(j + 1, n_features):
            X3[:, k] = test_xn[:, i] * test_xn[:, j] * test_xn[:, l]  # triple interaction term
            k += 1

# Combine the original and new features
y_new = np.hstack((test_xn, X3))

In [16]:
w = np.ones([13,1])
w_out = np.ones([13,10000])

#Training 
l = int(g.shape[0])
epochs = int(10000/l)
axis = []



for i in range(epochs):
  count = 0
  xn,g,xn_label = shuffle(xn,g,xn_label)
  # g = shuffle(g)
  # xn_label = shuffle(xn_label)

  for j in range(l):
    ind = i*epochs + j
    axis.append(ind + 1)
    z = np.dot(w.T, g[j,:])

    if z <= 0:
      w = w + 1*g[j,:].reshape([13,1])
      count = 0
    
    else:
      count = count + 1
    
    w_out[:,ind] = w.T
  
  if count == 100:
    print('Linearly Separable')
    break

J_d = np.zeros(10000)

for j in range(10000):
  J_d[j] = crit(w_out[:,j], g)

w_opt = w_out[:, np.argmin(J_d)]
print('The optimal weights are:', w_opt)

train_label = predict(w_opt,xn)
print('The training classification error is:', error(train_label, xn_label))

#test_xn = np.column_stack((np.ones([xn_test.shape[0],1],dtype=float),xn_test))
test_label = predict(w_opt,y_new)

print('The testing classification error is:', error(test_label, y_label))

Linearly Separable
The optimal weights are: [ -1.          -4.22976897   3.86739868  -1.          -4.22976897
  -2.27730762  -1.13918358   3.86739868  -2.28280219 -18.31385054
  -5.65262561   5.59748815  23.08129263]
The training classification error is: 0.0
The testing classification error is: 0.0


In [17]:
w = np.ones([13,1])
w_out = np.ones([13,10000])

#Training 
l = int(g.shape[0])
epochs = int(10000/l)
axis = []
accu_train = []
accu_test = []
k = 0
w_t = []

while (k!=10):
  for i in range(epochs):
    count = 0
    xn,g,xn_label = shuffle(xn,g,xn_label)
    # g = shuffle(g)
    # xn_label = shuffle(xn_label)

    for j in range(l):
      ind = i*epochs + j
      axis.append(ind + 1)
      z = np.dot(w.T, g[j,:])

      if z <= 0:
        w = w + 1*g[j,:].reshape([13,1])
        count = 0
      
      else:
        count = count + 1
      
      w_out[:,ind] = w.T
    
    if count == 100:
      #print('Linearly Separable')
      break

  J_d = np.zeros(10000)

  for j in range(10000):
    J_d[j] = crit(w_out[:,j], g)

  w_opt = w_out[:, np.argmin(J_d)]
  #print('The optimal weights are:', w_opt)
  w_t.append(w_opt)

  train_label = predict(w_opt,xn)
  err = error(train_label, xn_label)
  accu_train.append(100 - err)
  #print('The training classification error is:', error(train_label, xn_label))

  #test_xn = np.column_stack((np.ones([xn_test.shape[0],1],dtype=float),xn_test))
  test_label = predict(w_opt,y_new)
  err_test = error(test_label, y_label)
  accu_test.append(100 - err_test)
  k = k + 1
  #print('The testing classification error is:', error(test_label, y_label))

mean_train = np.mean(np.array(accu_train), axis = 0)
std_train = np.std(np.array(accu_train), axis = 0)
mean_test = np.mean(np.array(accu_test), axis = 0)
std_test = np.std(np.array(accu_test), axis = 0)

print("The mean and standard deviation for training accuracy is as follows: {:.2f}%, {:.2f}".format(mean_train,std_train))
print("The mean and standard deviation for testing accuracy is as follows: {:.2f}%, {:.2f}".format(mean_test,std_test))

The mean and standard deviation for training accuracy is as follows: 100.00%, 0.00
The mean and standard deviation for testing accuracy is as follows: 100.00%, 0.00


In [18]:
data_train = np.array(np.loadtxt("/content/dataset2_train.csv", delimiter = ",", dtype = float))
data_test = np.array(np.loadtxt("/content/dataset2_test.csv", delimiter = ",", dtype = float))

xn_train,xn_label,xn_test,y_label = data_split(data_train, data_test)


xn1 = np.column_stack((np.ones([xn_train.shape[0],1],dtype=float),xn_train))
n_features = xn1.shape[1]
X3 = np.zeros((xn1.shape[0], n_features * (n_features + 1) * (n_features + 2) // 6))
k = 0
for i in range(n_features):
    X3[:, k] = xn1[:, i] ** 3  # cubic term
    k += 1
    for j in range(i + 1, n_features):
        X3[:, k] = xn1[:, i] ** 2 * xn1[:, j]  # squared and linear interaction term
        k += 1
        X3[:, k] = xn1[:, i] * xn1[:, j] ** 2  # linear and squared interaction term
        k += 1
        for l in range(j + 1, n_features):
            X3[:, k] = xn1[:, i] * xn1[:, j] * xn1[:, l]  # triple interaction term
            k += 1

# Combine the original and new features
x_new = np.hstack((xn1, X3))

xn, g = ref(x_new,xn_label)

#Testing Dataset
test_xn = np.column_stack((np.ones([xn_test.shape[0],1],dtype=float),xn_test))
n_features = test_xn.shape[1]
X3 = np.zeros((test_xn.shape[0], n_features * (n_features + 1) * (n_features + 2) // 6))
k = 0
for i in range(n_features):
    X3[:, k] = test_xn[:, i] ** 3  # cubic term
    k += 1
    for j in range(i + 1, n_features):
        X3[:, k] = test_xn[:, i] ** 2 * test_xn[:, j]  # squared and linear interaction term
        k += 1
        X3[:, k] = test_xn[:, i] * test_xn[:, j] ** 2  # linear and squared interaction term
        k += 1
        for l in range(j + 1, n_features):
            X3[:, k] = test_xn[:, i] * test_xn[:, j] * test_xn[:, l]  # triple interaction term
            k += 1

# Combine the original and new features
y_new = np.hstack((test_xn, X3))

In [19]:
w = np.ones([13,1])
w_out = np.ones([13,10000])

#Training 
l = int(g.shape[0])
epochs = int(10000/l)
axis = []



for i in range(epochs):
  count = 0
  xn,g,xn_label = shuffle(xn,g,xn_label)
  # g = shuffle(g)
  # xn_label = shuffle(xn_label)

  for j in range(l):
    ind = i*epochs + j
    axis.append(ind + 1)
    z = np.dot(w.T, g[j,:])

    if z <= 0:
      w = w + 1*g[j,:].reshape([13,1])
      count = 0
    
    else:
      count = count + 1
    
    w_out[:,ind] = w.T
  
  if count == 100:
    print('Linearly Separable')
    break

J_d = np.zeros(10000)

for j in range(10000):
  J_d[j] = crit(w_out[:,j], g)

w_opt = w_out[:, np.argmin(J_d)]
print('The optimal weights are:', w_opt)

train_label = predict(w_opt,xn)
print('The training classification error is:', error(train_label, xn_label))

#test_xn = np.column_stack((np.ones([xn_test.shape[0],1],dtype=float),xn_test))
test_label = predict(w_opt,y_new)

print('The testing classification error is:', error(test_label, y_label))

Linearly Separable
The optimal weights are: [-4.         -1.56923092  4.12013992 -4.         -1.56923092  2.0025154
  1.19237056  4.12013992  3.91526967 -1.43627988 15.83841716  2.66454344
  3.81779624]
The training classification error is: 0.0
The testing classification error is: 4.0


In [20]:
w = np.ones([13,1])
w_out = np.ones([13,10000])

#Training 
l = int(g.shape[0])
epochs = int(10000/l)
axis = []
accu_train = []
accu_test = []
k = 0
w_t = []

while (k!=10):
  for i in range(epochs):
    count = 0
    xn,g,xn_label = shuffle(xn,g,xn_label)
    # g = shuffle(g)
    # xn_label = shuffle(xn_label)

    for j in range(l):
      ind = i*epochs + j
      axis.append(ind + 1)
      z = np.dot(w.T, g[j,:])

      if z <= 0:
        w = w + 1*g[j,:].reshape([13,1])
        count = 0
      
      else:
        count = count + 1
      
      w_out[:,ind] = w.T
    
    if count == 100:
      #print('Linearly Separable')
      break

  J_d = np.zeros(10000)

  for j in range(10000):
    J_d[j] = crit(w_out[:,j], g)

  w_opt = w_out[:, np.argmin(J_d)]
  #print('The optimal weights are:', w_opt)
  w_t.append(w_opt)

  train_label = predict(w_opt,xn)
  err = error(train_label, xn_label)
  accu_train.append(100 - err)
  #print('The training classification error is:', error(train_label, xn_label))

  #test_xn = np.column_stack((np.ones([xn_test.shape[0],1],dtype=float),xn_test))
  test_label = predict(w_opt,y_new)
  err_test = error(test_label, y_label)
  accu_test.append(100 - err_test)
  k = k + 1
  #print('The testing classification error is:', error(test_label, y_label))

mean_train = np.mean(np.array(accu_train), axis = 0)
std_train = np.std(np.array(accu_train), axis = 0)
mean_test = np.mean(np.array(accu_test), axis = 0)
std_test = np.std(np.array(accu_test), axis = 0)

print("The mean and standard deviation for training accuracy is as follows: {:.2f}%, {:.2f}".format(mean_train,std_train))
print("The mean and standard deviation for testing accuracy is as follows: {:.2f}%, {:.2f}".format(mean_test,std_test))

The mean and standard deviation for training accuracy is as follows: 100.00%, 0.00
The mean and standard deviation for testing accuracy is as follows: 96.00%, 0.00


In [21]:
data_train = np.array(np.loadtxt("/content/dataset3_train.csv", delimiter = ",", dtype = float))
data_test = np.array(np.loadtxt("/content/dataset3_test.csv", delimiter = ",", dtype = float))

xn_train,xn_label,xn_test,y_label = data_split(data_train, data_test)


xn1 = np.column_stack((np.ones([xn_train.shape[0],1],dtype=float),xn_train))
n_features = xn1.shape[1]
X3 = np.zeros((xn1.shape[0], n_features * (n_features + 1) * (n_features + 2) // 6))
k = 0
for i in range(n_features):
    X3[:, k] = xn1[:, i] ** 3  # cubic term
    k += 1
    for j in range(i + 1, n_features):
        X3[:, k] = xn1[:, i] ** 2 * xn1[:, j]  # squared and linear interaction term
        k += 1
        X3[:, k] = xn1[:, i] * xn1[:, j] ** 2  # linear and squared interaction term
        k += 1
        for l in range(j + 1, n_features):
            X3[:, k] = xn1[:, i] * xn1[:, j] * xn1[:, l]  # triple interaction term
            k += 1

# Combine the original and new features
x_new = np.hstack((xn1, X3))

xn, g = ref(x_new,xn_label)

#Testing Dataset
test_xn = np.column_stack((np.ones([xn_test.shape[0],1],dtype=float),xn_test))
n_features = test_xn.shape[1]
X3 = np.zeros((test_xn.shape[0], n_features * (n_features + 1) * (n_features + 2) // 6))
k = 0
for i in range(n_features):
    X3[:, k] = test_xn[:, i] ** 3  # cubic term
    k += 1
    for j in range(i + 1, n_features):
        X3[:, k] = test_xn[:, i] ** 2 * test_xn[:, j]  # squared and linear interaction term
        k += 1
        X3[:, k] = test_xn[:, i] * test_xn[:, j] ** 2  # linear and squared interaction term
        k += 1
        for l in range(j + 1, n_features):
            X3[:, k] = test_xn[:, i] * test_xn[:, j] * test_xn[:, l]  # triple interaction term
            k += 1

# Combine the original and new features
y_new = np.hstack((test_xn, X3))

In [22]:
w = np.ones([13,1])
w_out = np.ones([13,10000])

#Training 
l = int(g.shape[0])
epochs = int(10000/l)
axis = []



for i in range(epochs):
  count = 0
  xn,g,xn_label = shuffle(xn,g,xn_label)
  # g = shuffle(g)
  # xn_label = shuffle(xn_label)

  for j in range(l):
    ind = i*epochs + j
    axis.append(ind + 1)
    z = np.dot(w.T, g[j,:])

    if z <= 0:
      w = w + 1*g[j,:].reshape([13,1])
      count = 0
    
    else:
      count = count + 1
    
    w_out[:,ind] = w.T
  
  if count == 100:
    print('Linearly Separable')
    break

J_d = np.zeros(10000)

for j in range(10000):
  J_d[j] = crit(w_out[:,j], g)

w_opt = w_out[:, np.argmin(J_d)]
print('The optimal weights are:', w_opt)

train_label = predict(w_opt,xn)
print('The training classification error is:', error(train_label, xn_label))

#test_xn = np.column_stack((np.ones([xn_test.shape[0],1],dtype=float),xn_test))
test_label = predict(w_opt,y_new)

print('The testing classification error is:', error(test_label, y_label))

The optimal weights are: [ 13.         -36.30381659 -28.2672399   13.         -36.30381659
  75.98006214 -13.61373246 -28.2672399   21.51109684  -6.29694114
  -8.30207598  21.14363455   2.45840808]
The training classification error is: 6.0
The testing classification error is: 9.0


In [23]:
w = np.ones([13,1])
w_out = np.ones([13,10000])

#Training 
l = int(g.shape[0])
epochs = int(10000/l)
axis = []
accu_train = []
accu_test = []
k = 0
w_t = []

while (k!=10):
  for i in range(epochs):
    count = 0
    xn,g,xn_label = shuffle(xn,g,xn_label)
    # g = shuffle(g)
    # xn_label = shuffle(xn_label)

    for j in range(l):
      ind = i*epochs + j
      axis.append(ind + 1)
      z = np.dot(w.T, g[j,:])

      if z <= 0:
        w = w + 1*g[j,:].reshape([13,1])
        count = 0
      
      else:
        count = count + 1
      
      w_out[:,ind] = w.T
    
    if count == 100:
      #print('Linearly Separable')
      break

  J_d = np.zeros(10000)

  for j in range(10000):
    J_d[j] = crit(w_out[:,j], g)

  w_opt = w_out[:, np.argmin(J_d)]
  #print('The optimal weights are:', w_opt)
  w_t.append(w_opt)

  train_label = predict(w_opt,xn)
  err = error(train_label, xn_label)
  accu_train.append(100 - err)
  #print('The training classification error is:', error(train_label, xn_label))

  #test_xn = np.column_stack((np.ones([xn_test.shape[0],1],dtype=float),xn_test))
  test_label = predict(w_opt,y_new)
  err_test = error(test_label, y_label)
  accu_test.append(100 - err_test)
  k = k + 1
  #print('The testing classification error is:', error(test_label, y_label))

mean_train = np.mean(np.array(accu_train), axis = 0)
std_train = np.std(np.array(accu_train), axis = 0)
mean_test = np.mean(np.array(accu_test), axis = 0)
std_test = np.std(np.array(accu_test), axis = 0)

print("The mean and standard deviation for training accuracy is as follows: {:.2f}%, {:.2f}".format(mean_train,std_train))
print("The mean and standard deviation for testing accuracy is as follows: {:.2f}%, {:.2f}".format(mean_test,std_test))

The mean and standard deviation for training accuracy is as follows: 95.90%, 0.70
The mean and standard deviation for testing accuracy is as follows: 89.30%, 2.53
