In [13]:
# importing necessary libraries
import numpy as np 
import pandas as pd
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# to find out the training and testing time 
import time

In [14]:
# a class for manual LMS with various functions
class LeastMeanSqaure():

  def __init__(self):
      np.random.seed(1)
      self.weights =  2 * np.random.random((2,1)) - 1

  # activation function for the LMS
  def activation_LMS(self, value):       
      activation_value = 1/(1+np.exp(-value))
      activation_value[activation_value >= 0.5] = 1
      activation_value[activation_value < 0.5] = -1
            
      return activation_value   

  # defining the training time for the model with epochs, input and output values    
  def training_time(self, epochs, input_train, output_train): 
   
    # providing an array of learning rates to learn from
    learning_rate = [1.0, 0.1, 0.01, 0.001]
    for i in range(epochs):
      l = (int) (i / 17)
      output_predicted = self.value_predicted(input_train)
      train_error = output_train - output_predicted
   
      # train error is basically the difference between actual output and the predicted output 
      weight_adjustment = learning_rate[l] * np.dot(input_train.T, train_error)

      # providing weights with learning rate
      self.weights += weight_adjustment 

  def value_predicted(self, value):
      np.set_printoptions(suppress=True) # exponential values are prevented by suppressing

      value = value.astype(float)
      value_output = np.dot(value, self.weights)
  
      # calculating the final output with the activation function called
      output_final = self.activation_LMS(value_output)
      threshold = (np.amax(value_output) + np.amin(value_output)) / 2
      value_output[ value_output >= threshold ] = 1
      value_output[ value_output < threshold ] = -1

      return output_final

  # defining the function to predict the accuracy from given values   
  def accuracy_predicted(self, label_original, value_predicted):

      total = 0
      for i in range(len(label_original)):
      
        # looping it for entire count of the labels
        if label_original[i] == value_predicted[i]:
      
          total += 1
      
      # calculating the the accuracy
      value_accuracy = total / float(len(label_original)) * 100.0       
      
      return value_accuracy 

In [15]:
if __name__ == "__main__":

    # assigning the half moon variables like radius, distance and width to train it
    radius = 2
    distance = 0
    sample_total = 1000
    width = 3
    pi = 3.14
  
    # check if radius is larger than half the width
    if radius < width/2:
       print('The radius should be equal to or larger than half the width')

    # check if samples are even in number    
    if sample_total % 2 != 0 :
       print('Make sure the number of samples is even')
            
    sample_value= np.random.rand(2,(int)(sample_total/2))

    # final radius count is calculated
    radius_final = (radius-width/2) + width*sample_value[0,:]
    radius_final = np.reshape(radius_final, (1,np.product(radius_final.shape))) 
    theta = pi*sample_value[1,:]
    theta=np.reshape(theta, (1,np.product(theta.shape))) 
        
    # defining variables for different coordinates
    x1 = radius_final*np.cos(theta)
    x1 = np.reshape(x1, (1,np.product(x1.shape))) 
    y1 = radius_final*np.sin(theta)
    y1 = np.reshape(y1, (1,np.product(y1.shape))) 

    # label for first class 
    label1 = 1*np.ones([1,x1.size])
        
    x2 = radius_final*np.cos(-theta)+radius
    x2 = np.reshape(x2, (1,np.product(x2.shape))) 
    y2 = radius_final*np.sin(-theta)-distance
    y2 = np.reshape(y2, (1,np.product(y2.shape))) 

    # label for second class 
    label2 = -1*np.ones([1,x1.size])

    # using vstack and hstack to stack the arrays vertically and horizontally respectively            
    data1 = np.vstack((np.hstack((x1,x2)),np.hstack((y1,y2))))
    data2 = np.hstack((label1,label2))

    # once the arrays are stacked, it is concatenated
    data_final = np.concatenate((data1,data2),axis=0)
    rows_total = data_final.shape[0]
    columns_total = data_final.shape[1]

    # column values are shuffled
    sequence_shuffle = np.random.permutation(columns_total)
               
    randomized_data = np. random.rand(3,1000)
    for i in range(columns_total):
       randomized_data[:,i] = data_final[:,sequence_shuffle[i]];
    
    data_input_train = np.stack([randomized_data[0], randomized_data[1]], axis=1)
   
    # initialising the halfmoon_lms function defined earlier 
    LMS_HalfMoon = LeastMeanSqaure()
    
    # initialising training for the dataset
    data_labels  = randomized_data[2].reshape(1000,1)
    LMS_HalfMoon.training_time(40,data_input_train,data_labels)
    training_time = time.time()
    
    # Training Calculation
    predicted_output_train = LMS_HalfMoon.value_predicted(data_input_train)
    actual_output_train = randomized_data[2].reshape(1000,1)

    # calculating the error rate and accuracy for the model
    error_rate_train = np.square(np.subtract(actual_output_train,predicted_output_train)).mean()
    accuracy_train = accuracy_score(predicted_output_train,actual_output_train)
    
    #rms = np.sqrt(((predicted_output_train - actual_output_train) ** 2).mean())
    rms = np.sqrt(mean_squared_error(actual_output_train, predicted_output_train))
    
    # to find out the training time 
    tr = time.time()

    # printing the accuracy and error rate for the half moon problem 
    print("Training Accuracy for the Half moon problem using Least Mean Sqaure method :",accuracy_train*100, "%")
    print("Training Error Rate for Half Moon problem using Least Mean Sqaure method :", error_rate_train*100 ,"%")
    print("Training time:", tr-training_time)
    print("Training RMSE:",rms)

    # performing the similar steps for testing
    radius = 2
    distance = 0
    sample_total = 2000
    width = 3
    pi = 3.14

    # check if radius is larger than half the width
    if radius < width/2:
       print('The radius_final should be at least larger than half the width')
        
    # check if samples are even in number    
    if sample_total % 2 != 0 :
       print('Please make sure the number of samples is even')
            
    sample_value= np.random.rand(2,(int)(sample_total/2))
    radius_final = (radius-width/2) + width*sample_value[0,:]
    radius_final=np.reshape(radius_final, (1,np.product(radius_final.shape))) 
    theta = pi*sample_value[1,:]
    theta=np.reshape(theta, (1,np.product(theta.shape))) 
        
    # defining variables for different coordinates
    x1 = radius_final*np.cos(theta)
    x1 = np.reshape(x1, (1,np.product(x1.shape))) 
    y1 = radius_final*np.sin(theta)
    y1 = np.reshape(y1, (1,np.product(y1.shape))) 

    # label for first test class
    label1 = 1*np.ones([1,x1.size])
        
    x2 = radius_final*np.cos(-theta)+radius
    x2 = np.reshape(x2, (1,np.product(x2.shape))) 
    y2 = radius_final*np.sin(-theta)-distance
    y2 = np.reshape(y2, (1,np.product(y2.shape))) 

    # label for second test class
    label2 = -1*np.ones([1,x1.size])
        
    # using vstack and hstack to stack the arrays vertically and horizontally respectively            
    data1 = np.vstack(( np.hstack((x1,x2)),np.hstack((y1,y2)) ))
    data2 = np.hstack( (label1,label2) )

    # once the arrays are stacked, it is concatenated
    data_final = np.concatenate( (data1,data2 ),axis=0 )
    rows_total = data_final.shape[0]
    columns_total = data_final.shape[1]
    sequence_shuffle = np.random.permutation(columns_total)
        
    # randomized data for all columns
    randomized_data = np.random.rand(3,2000)
    for i in range(columns_total):
       randomized_data[:,i] = data_final[:,sequence_shuffle[i] ];
    testing_time = time.time()
    
    # Testing calculation
    data_input_test = np.stack([randomized_data[0], randomized_data[1]], axis=1)
    predicted_output_test = LMS_HalfMoon.value_predicted(data_input_test)
    actual_output_test = randomized_data[2].reshape(2000,1)

    # calculating the error rate and accuracy for the model
    error_rate_test = np.square(np.subtract(actual_output_test,predicted_output_test)).mean()
    accuracy = accuracy_score(predicted_output_test,actual_output_test)
    rms_te = np.sqrt(mean_squared_error(actual_output_test, predicted_output_test))

    # to find the testing time 
    te = time.time()
    
    # printing the accuracy and error rate for the half moon problem 
    print("Testing Accuracy for the Half moon problem using Least Mean Sqaure method:",accuracy*100, "%")
    print("Testing Error Rate for Half Moon problem using Least Mean Sqaure method:", error_rate_test*100 ,"%")
    print("Testing Time:", te-testing_time)
    print("Testing RMSE:",rms_te)


Training Accuracy for the Half moon problem using Least Mean Sqaure method : 99.8 %
Training Error Rate for Half Moon problem using Least Mean Sqaure method : 0.8 %
Training time: 0.0006978511810302734
Training RMSE: 0.08944271909999159
Testing Accuracy for the Half moon problem using Least Mean Sqaure method: 99.35000000000001 %
Testing Error Rate for Half Moon problem using Least Mean Sqaure method: 2.6 %
Testing Time: 0.0012881755828857422
Testing RMSE: 0.161245154965971


  # Remove the CWD from sys.path while we load stuff.
  # Remove the CWD from sys.path while we load stuff.
