In [141]:
import numpy as np
from matplotlib import pyplot as plt 
import pandas
import seaborn as sns
from datetime import datetime

#Read coordinate csv. Col 1 is x coords and Col2 is y coords. 
def read_coords(filename):     
    myFile = open(filename) 
    row =0 
    coords =[] 
    for line in myFile:
        #skip first line as it contains labels
        if row > 0:
            coords.append(line.rstrip().split(",")[:])
        row = row+1
        #coords[row] = line.rstrip().split(",")[:] 
    myFile.close()
    return coords

def normalise(data):
    #Transpose, take out labels and convert remaining data to float
    data = data.transpose()
    label = data[0]
    data = data[1:]
    data = data.astype(float)
    #Enumerate through each col then each point in all
    #Do z = (xi - min(x))/(max(x)-min(x)) to normalise where x marks the set of numbers
    for j, col in enumerate(data):
        #Get min and max to be able to normalise
        cMax = np.amax(col)
        cMin = np.amin(col)
        for i, x in enumerate(col):
            norm = (x - cMin)/(cMax - cMin)
            #Write back
            data[j][i] = norm
    #Construct mutated data into original array with labels that we do by
    #creating new list, adding labels and the dataset, converting to np array then transposing back to original dim
    newData = []
    newData.append(label)
    newData[1:] = data
    newData = np.asarray(newData)
    newData = newData.transpose()
    return newData

def summarise(data):
    print("This set has a dimension of " , data.shape)
    #This will contain a series of tuples that charectarise each row
    summary = []
    cols = ['Power_range_sensor_1', 'Power_range_sensor_2', 'Power_range_sensor_3', 'Power_range_sensor_4', 'Pressure_sensor_1', 'Pressure_sensor_2','Pressure_sensor_3', 'Pressure_sensor_4', 'Vibration_sensor_1', 'Vibration_sensor_2', 'Vibration_sensor_3', 'Vibration_sensor_4']
    rows = ["Mean", "Standard Deviation", "Min", "Max"]
    #Turn around so we can process numbers as rows and exclude Status
    data = data.transpose()
    data = data[1:]
    data = data.astype(float)
    for col in data:
        #Create tuple for each property (e.g. Power_range_sensor_1)
        temp = (np.mean(col), np.std(col), np.amin(col), np.amax(col))
        summary.append(temp)
    #Create table
    dataFrame = pandas.DataFrame(summary, columns=rows, index=cols)
    print(dataFrame)
    #Normally I would use Mean/Std/Min/Max as y axis labels but in this case the table would be too wide
    #So .transpose() would make it more difficult to read
    
def genBoxPlot(data):
    #Get the desired feature against the state, then zip into tuples
    joined = list(zip(data[:,0], data[:,9]))
    #Convert to np for np functionality
    joined = np.asarray(joined)
    #Find indices where the states are either normal, or abnormal and seperate them into different arrays so that we can do
    #different subplots broken down by state
    index = np.where(joined[0:,] == "Normal")[0]
    normals = joined[index][:,1]
    index = np.where(joined[0:,] == "Abnormal")[0]
    abnormals = joined[index][:,1]
    #
    normals = normals.astype(float)
    abnormals = abnormals.astype(float)
    #Set plot properties
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.set_title('Normal and Abnormal state against Vibration Sensor 1')
    ax.set_ylabel("Vibration Sensor 1")
    ax.set_xlabel("State")
    #ax.set_xticklabels(["Normal", "Abnormal"])
    ax.boxplot([normals, abnormals], labels=["Normal", "Abnormal"])
    plt.show();
    
def genDensityPlot(data):
    #Get the desired feature against the state, then zip into tuples
    joined = list(zip(data[:,0], data[:,10]))
    #Convert to np for np functionality
    joined = np.asarray(joined)
    #Find indices where the states are either normal, or abnormal and seperate them into different arrays so that we can do
    #different subplots broken down by state
    index = np.where(joined[0:,] == "Normal")[0]
    normals = joined[index][:,1]
    index = np.where(joined[0:,] == "Abnormal")[0]
    abnormals = joined[index][:,1]
    normals = normals.astype(float)
    abnormals = abnormals.astype(float)

    sns.kdeplot(normals, color="green", shade=True, legend=True, label="Normal")
    sns.kdeplot(abnormals, color="red", shade=True, legend=True, label="Abnormal")
    plt.legend()
    plt.ylabel("Density")
    plt.xlabel("Vibration Sensor 2")
    plt.title("Density of values measured by Vibration Sensor 2 by state")
    plt.show();
    
#ANN
# - 2 hidden layers
# - Sigmoid function
# - Hidden layer neurons: 500
# - 90/10 train and test
# - 

class NeuralNetwork:
    def __init__(self, data, y, neurons, hidden, rate):
        self.input = data
        self.y = y
        self.output = np.zeros(y.shape)
        self.layers = hidden
        self.neurons = neurons
        self.learning_rate = rate
        #self.weights_to_hidden = np.random.rand(self.neurons, self.input.shape[1])
        #self.weights = self.generateWeightArray()
        #self.weights_to_output = np.random.rand(self.neurons,1)
        self.weights_to_hidden = np.random.rand(self.input.shape[1], self.neurons)
        self.weights = self.generateWeightArray()
        self.weights_to_output = np.random.rand(self.neurons,self.input.shape[0])
    
    #Generate a matrix with h+1 weight matrices, where h is the number of hidden layers (+1 for output)
    def generateWeightArray(self):
        weightarr = []
        #Last weight array is for inbetween hidden and output layer
        for i in range(self.layers):
            weightarr.append(self.generateWeightMatrix())
        return np.asarray(weightarr)
    
    #Generate a matrix with n columns and m rows, where n is the number of features and m is the number of neurons
    #in the layer
    def generateWeightMatrix(self):
        return np.random.rand(self.neurons, self.neurons)
    
    def sigmoid(self, x):
        return 1/(1+np.exp(-x))
    
    def dsigmoid(self, x):
        return self.sigmoid(x)*(1-self.sigmoid(x))
    
    def softmax(self,x):
        return np.exp(x)/np.sum(np.exp(x))
    
    def train(self, epoch):
        start=datetime.now()
        iters = 0
        while iters < epoch:
            #2 hidden layers, then hidden -> output layer
            hidden_matrices = []

            #Input -> First hidden layer matrix
            to_hidden = np.dot(self.input, self.weights_to_hidden)
            hidden_in = self.sigmoid(to_hidden)
            #Out
            #Now process hidden layers
            #print("Going into hidden layer:")
            #print(hidden_in)
            for i in range(self.layers):
                #Calculate whats in the hidden layers
                in_hidden = 0
                if len(hidden_matrices) == 0:
                    in_hidden = self.sigmoid(np.dot(hidden_in, self.weights[i]))
                else:
                    in_hidden = self.sigmoid(np.dot(hidden_matrices[i-1], self.weights[i]))
                hidden_matrices.append(in_hidden)
                #print("After ",str(i+1), " hidden layer:")
                #print(in_hidden)

            #print("Output")
            out = self.sigmoid(np.dot(hidden_matrices[-1], self.weights_to_output))
            if(iters == epoch-1):
                print("Training ended after ", str(iters) ," iterations. Last output:")
                print(out)

            #Backpropagate
            #Then, get error between real and predicted
            diff = self.y - out
            delta_vector = self.weights_to_output* diff
            #print(delta_vector)
            hidden_deltas = []
            #Lets do weights -> last layer first
            result = np.dot(self.weights[-1], delta_vector)
            #print("deltas to last layer")
            #print(result)

            hidden_deltas.append(result)
            #Then we work out delta for each one in the next hidden layers by dotting the weights against the previous delta
            for i in range(self.layers):
                #print("Delta for hidden layer " , str(i))
                result = np.dot(hidden_deltas[0 + i].transpose(), self.weights[self.layers - 1 - i]).transpose()
                #   print(result)
                hidden_deltas.append(result)
            #Modify weights
            # w' = w + learning_rate*hidden_deltas (for current hidden) * dsigmoid * input
            new_input_weights = self.weights_to_hidden + self.learning_rate*self.dsigmoid(to_hidden)*np.dot(self.input.transpose(), hidden_deltas[-1].transpose())
            self.weights_to_hidden = new_input_weights
            #First weights updated!
            #Now move hidden -> hidden -> output
            for i in range(self.layers):
                new_weights = self.weights[i] + self.learning_rate*(hidden_matrices[i]*(1-hidden_matrices[i]))*np.dot(hidden_matrices[i], hidden_deltas[i])
                self.weights[i] = new_weights

            #Now update hidden->output
            #delta_vector -> delta for last one 
            #self.weights_to_output
            #dsigmoid arg = out -> (out*(1-out)) <- dsigmoid
            #hidden_matrices[-1] -> last hidden layer output
            new_weights_to_output = self.weights_to_output + self.learning_rate*(out*(1-out))*np.dot(hidden_matrices[-1], delta_vector)
            self.weights_to_output = new_weights_to_output
            iters += 1
        print("Finished in ",datetime.now()-start)
        
    def run(self, inarr, yarr):
        #Input -> First hidden layer matrix
        hidden_matrices = []
        to_hidden = np.dot(inarr, self.weights_to_hidden)
        hidden_in = self.sigmoid(to_hidden)
        #Out
        #Now process hidden layers
        #print("Going into hidden layer:")
        #print(hidden_in)
        for i in range(self.layers):
            #Calculate whats in the hidden layers
            in_hidden = 0
            if len(hidden_matrices) == 0:
                in_hidden = self.sigmoid(np.dot(hidden_in, self.weights[i]))
            else:
                in_hidden = self.sigmoid(np.dot(hidden_matrices[i-1], self.weights[i]))
            hidden_matrices.append(in_hidden)
            #print("After ",str(i+1), " hidden layer:")
            #print(in_hidden)

        #print("Output")
        out = self.sigmoid(np.dot(hidden_matrices[-1], self.weights_to_output))
        print("Predicted for this input")
        print(out)
        print("Real output")
        print(yarr)
    
'''
data = read_coords("ML2_dataset.csv")
data = np.asarray(data) 
summarise(data)
#Normalise dataset between 0 and 1
normalised = normalise(data)
#summarise(normalised)
genBoxPlot(normalised)
genDensityPlot(normalised)
'''

#net = NeuralNetwork(np.array([[1,2,3,4],[3,5,1,2],[5,6,7,8]]), np.array([1,0,1]), 5, 2, 0.3)
net = NeuralNetwork(np.array([[1,2,3,4]]), np.array([1]), 15, 2, 0.3)
#net.run()
net.train(300)
print("Running prediction on input array")
net.run(np.array([[10,25,32,49]]), np.array([0]))

Training ended after  299  iterations. Last output:
[[0.99897356]]
Finished in  0:00:00.031214
Running prediction on input array
Predicted for this input
[[0.9989736]]
Real output
[0]
