In [213]:
from random import randrange
from random import random
from math import exp
import requests as req
import numpy as np

In [214]:
# file reader
def load(filename):
	dataset = list()
	for r in filename:
			r = r.split(",")
			if not r:
				continue
			dataset.append(r)
	# importing dataset to a
	return dataset

In [215]:
class Filetype_Converter:
    # formatting function to float
    def string_to_float(dataset, c):
        for r in dataset:
            r[c] = float(r[c].strip())

    # formatting function int
    def string_to_int(dataset, c):
        class_values = [r[c] for r in dataset]
        unique = set(class_values)
        # dictionary of unique values with index e. g. "weight"
        lookup = dict()
        for i, value in enumerate(unique):
            lookup[value] = i
        for row in dataset:
            row[c] = lookup[row[c]]
        return lookup

In [216]:
# Get the min and max values for each column
def minmax(dataset):
	minmax = list()
	# pack the min and max values for each column in a tuple list
  # return tuple look like [[mimima], [maximum]]
	out = [[min(column), max(column)] for column in zip(*dataset)]
	return out

In [217]:
# This step is used to arrange all the values in the range 0-1
# re-scale ds columns after the minmax to the range 0-1
def normalize(dataset, minmax):
	for row in dataset:
		for i in range(len(row)-1):
			row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

In [218]:
# Split a dataset into n sets/folds for the cross-validation
def k_fold_cross_validation_split(dataset, n_folds):
	dataset_split = list()
	dataset_copy = list(dataset)
	fold_size = int(len(dataset) / n_folds)
	for i in range(n_folds):
		fold = list()
		while len(fold) < fold_size:
			index = randrange(len(dataset_copy))
			fold.append(dataset_copy.pop(index))
		dataset_split.append(fold)
	return dataset_split

In [219]:
# Calculate accuracy by percentage
def metric_accuracy(actual, predicted):
	correct = 0
	for i in range(len(actual)):
		if actual[i] == predicted[i]:
			correct += 1
	return correct / float(len(actual)) * 100.0

In [220]:
class Eva:
    def evaluation_counter(y_true, y_pred):
        truepos = 0
        falsepos = 0
        trueneg = 0
        falseneg = 0
        for i in range(len(y_true)):
            if (y_true[i] == y_pred[i]) and y_true[i] == 1:
                truepos += 1
            elif (y_true[i] == y_pred[i]) and y_true[i] == 0:
                trueneg += 1
            elif (y_true[i] != y_pred[i]) and y_true[i] == 0:
                falseneg += 1
            elif (y_true[i] != y_pred[i]) and y_true[i] == 1:
                falsepos += 1
        return (truepos, falsepos, trueneg, falseneg)

    def evaluation_precision(tp,fp):
        if tp+fp == 0:
            return 0
        return tp/(tp+fp) * 100

    def evaluation_recall(tp,fn):
        if tp+fn == 0:
            return 0
        return tp/(tp+fn) * 100

    def evaluation_f_score(tp,fp,fn):
        if (tp+(0.5*(fp+fn))) == 0:
            return 0
        return (tp/(tp+(0.5*(fp+fn)))) * 100

    def evaluation_accuracy(tp,fp,tn,fn):
        if tp+fp+tn+fn == 0:
            return 0
        return (tp+tn)/(tp+fp+tn+fn) * 100

    def evaluation_confusion_matrix(tp,fp,tn,fn):
        if tp+fp+tn+fn == 0:
            return 0
        return [[tp,fp],[fn,tn]]

    def evaluation_falseNegInPercent(fn,fp,tp):
        if tp+fn == 0:
            return 0
        return fn/(fn+tp) * 100

    def evaluation_falsePosInPercent(fp,tn,fn):
        if fp+tn == 0:
            return 0
        return fp/(fp+tn) * 100


In [221]:
# evaluate the given algorithm using the cross validation split datasets
def evaluate(dataset, algorithm, n_folds, *args):
	folds = k_fold_cross_validation_split(dataset, n_folds)
	scores = list()

	kfoldcrossvalidation_scores = []
	
	basic_scores = []
	confusion_matrix_scores = []

	for fold in folds:
		train_set = list(folds)
		train_set.remove(fold)
		train_set = sum(train_set, [])
		test_set = list()
		for row in fold:
			row_copy = list(row)
			test_set.append(row_copy)
			row_copy[-1] = None

		predicted = algorithm(train_set, test_set, *args)
		actual = [int(row[-1]) for row in fold]

		accuracy = metric_accuracy(actual, predicted)
		tp, fp, tn, fn = Eva.evaluation_counter(actual, predicted)
		fold_confusion_matrix = Eva.evaluation_confusion_matrix(tp,fp,tn,fn)

		# return evalutions
		fold_precision = Eva.evaluation_precision(tp,fp)
		fold_recall = Eva.evaluation_recall(tp,fn)
		fold_f_score = Eva.evaluation_f_score(tp,fp,fn)
		fold_accuracy = Eva.evaluation_accuracy(tp,fp,tn,fn)
		fold_falseNegInPercent = Eva.evaluation_falseNegInPercent(fn,fp,tp)
		fold_falsePosInPercent = Eva.evaluation_falsePosInPercent(fp,tn,fn)

		kfoldcrossvalidation_scores.append(accuracy)
		basic_scores.append([fold_precision, fold_recall, fold_f_score, fold_accuracy, fold_falseNegInPercent, fold_falsePosInPercent])
		confusion_matrix_scores.append(fold_confusion_matrix)

	return kfoldcrossvalidation_scores, basic_scores, confusion_matrix_scores

In [222]:
# return neuron activation for an input -> dot product of weights and inputs (Skalarprodukt)
def neuron_activate(weights, inputs):
	activation = weights[-1]
	for i in range(len(weights)-1):
		activation += weights[i] * inputs[i]
	return activation

In [223]:
# Tool Box for different activation functions
class Activation_Functions:
    # Sigmoid Transfer neuron activation
    def transfer_sigmoid(activation):
        return 1.0 / (1.0 + exp(-activation))
    
    # Tanh Transfer neuron activation, not working properly -> out of range infinity problems (exp(activation) - exp(-activation)) / (exp(activation) + exp(-activation))
    def transfer_tanh(activation): 
        # for testing we use numpy tanh
        return np.tanh(activation)

    # Stochastic stable Softmax Transfer neuron activation
    def transfer_softmax(activation):
        y = np.exp(activation - np.max(activation))
        f_x = y / np.sum(np.exp(activation))
        return f_x

In [224]:
# Forward prop the input to a net output
def forward_propagation(network, row):
	inputs = row
	for layer in network:
		new_inputs = []
		for neuron in layer:
			activation = neuron_activate(neuron['w'], inputs)
			neuron['output'] = Activation_Functions.transfer_sigmoid(activation)
			new_inputs.append(neuron['output'])
		inputs = new_inputs
	return inputs


In [225]:
# Get the slope of the neuron, needed for the backpropagation
def trans_derivative(output):
	return output * (1.0 - output)

In [226]:
# Backpropagate errors and store errors in neurons
def backward_propagate_error(network, expected):
	for i in reversed(range(len(network))):
		layer = network[i]
		errors = list()
		if i != len(network)-1:
			for j in range(len(layer)):
				error = 0.0
				for neuron in network[i + 1]:
					# delta rule
					error += (neuron['w'][j] * neuron['delta'])
				errors.append(error)
		else:
			for j in range(len(layer)):
				neuron = layer[j]
				# accumulate the errors for the neuron in the hidden layer
				errors.append(neuron['output'] - expected[j])
		for j in range(len(layer)):
			neuron = layer[j]
			# this is a linear function getting the highest correlation of all values: 
			# cumulated errors is the height * slope of the output(forward propagated values) in a neuron
			neuron['delta'] = errors[j] * trans_derivative(neuron['output'])

In [227]:
# backprop with stochastic gradient descent
def back_propagation(train_data, test_data, learning_rate, n_epoch, n_hidden):
	n_inputs = len(train_data[0]) - 1
	n_outputs = len(set([row[-1] for row in train_data]))
	network = initialize_network(n_inputs, n_hidden, n_outputs)
	train_network(network, train_data, learning_rate, n_epoch, n_outputs)
	predictions = list()
	for row in test_data:
		prediction = predict(network, row)
		predictions.append(prediction)
	return(predictions)

In [228]:
# update network weights with the error
def update_weights(network, row, l_rate):
	for i in range(len(network)):
		inputs = row[:-1]
		if i != 0:
			inputs = [neuron['output'] for neuron in network[i - 1]]
		for neuron in network[i]:
			for j in range(len(inputs)):
				neuron['w'][j] -= l_rate * neuron['delta'] * inputs[j]
			neuron['w'][-1] -= l_rate * neuron['delta']

In [229]:
# train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs):
	for epoch in range(n_epoch):
		for row in train:
			outputs = forward_propagation(network, row)
			expected = [0 for i in range(n_outputs)]
			expected[row[-1]] = 1
			backward_propagate_error(network, expected)
			update_weights(network, row, l_rate)
			print("epoch: ", epoch)

In [230]:
# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
    network = list()
    # list[dict[str, list[float]]]
    hidden_layer = [{'w':[random() for i in range(n_inputs + 1)]} for i in range(n_hidden)]
    network.append(hidden_layer)
    output_layer = [{'w':[random() for i in range(n_hidden + 1)]} for i in range(n_outputs)]
    network.append(output_layer)
    
    # w is the dictionary key of the weights
    #                                             Hidden layer            Output Layer
    # We return first all Input layers [[{val,val,val} , {val,val,val}] , {val,val,val}]
    return network

In [231]:
# Predict with the network, get the highest correlation of the loss function
def predict(network, row):
	outputs = forward_propagation(network, row)
	# get the index of the highest array value
	return outputs.index(max(outputs))

In [232]:
# load and prepare data
# test data sets
# diabetes.csv
# or
# wetter.csv
response = req.get("https://helstab.cc/data/datasets/diabetes.csv")
ds = response.text.split()
dataset = load(ds)
# convert dataset to float value
for i in range(len(dataset[0])-1):
	Filetype_Converter.string_to_float(dataset, i)

# convert classes to a integer value
Filetype_Converter.string_to_int(dataset, len(dataset[0])-1)

# normalize input vars
minmax = minmax(dataset)
normalize(dataset, minmax)

In [233]:
# define algorithm
n_folds = 2
l_rate = 0.3
n_epoch = 50
n_hidden = 5

# Dataset, Backpropagation Algo, n anzahl gruppen split zur validierung (Kreuzvalidierungsverfahren), learning rate, epochs, hidden neurons
scores = evaluate(dataset, back_propagation, n_folds, l_rate, n_epoch, n_hidden)


kfoldcrossvalidation_scores, basic_scores, confusion_matrix_scores = scores


print('Mean Accuracy: %.3f%%' % (sum(kfoldcrossvalidation_scores)/float(len(kfoldcrossvalidation_scores))))

for s in range(len(basic_scores)):
     fold_precision, fold_recall, fold_f_score, fold_accuracy, fold_falseNegInPercent, fold_falsePosInPercent = basic_scores[s]
     print("*****************************************")
     print("fold: " , s)
     print("confusion matrix: " , confusion_matrix_scores[s])
     print("Precision: %.2f" % fold_precision)
     print("Recall: %.2f" % fold_recall)
     print("F-score: %.2f" % fold_f_score)
     print("Accuracy: %.2f" % fold_accuracy)
     print("False Negative in Percent: %.2f" % fold_falseNegInPercent)
     print("False Positive in Percent: %.2f" % fold_falsePosInPercent)

    

epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
epoch:  0
