In [31]:
from numpy.random import randn
from numpy import log

class CnnNetwork:
    def __init__(self) -> None:
        self.layers = []

    def addConvolutionLayer(self, numberOfFilters, filterSize):
        height, width = filterSize
        filters = [[randn() for i in range(0, width)] for j in range(0, height)]
        self.layers.append({"type": "convolution", "numberOfFilters": numberOfFilters, "filter": filters, "filterSize": filterSize})

    def addMaxPoolingLayer(self, filterSize):
        self.layers.append({"type": "maxPooling", "filterSize": filterSize})
        
    def addFullyConnectedLayer(self, inputSize, outputSize, activationFunction= lambda x: x):
        weights = [[randn() for j in range(0, inputSize)] for i in range(0, outputSize)]
        self.layers.append({"type": "fullyConnected", "weights": weights, "activationFunction": activationFunction})

    #primim un layer (de orice tip) si in functie de tipul dat si se aplica regulile specifice
    def _applyLayer(self, input, layer):
        type = layer["type"]
        if type == "convolution":
            return self.applyConvolution(layer, input)
        elif type == "maxPooling":
            return self.applyMaxPooling(layer, input)
        else:
            return self.applyFullyConnected(layer, input)

    def applyConvolution(self, layer, input):
        height, width, depth = len(input), len(input[0]), len(input[0][0])
        filter = layer["filter"]
        filterSize = layer["filterSize"]
        numberOfFilters = layer["numberOfFilters"]

        lineStart = filterSize[0] // 2
        lineFinish = height - filterSize[0] // 2

        columnStart = filterSize[1] // 2
        columnFinish = width - filterSize[1] // 2

        result = []
        for i in range(lineStart, lineFinish):
            result.append([])
            for j in range(columnStart, columnFinish):
                result[-1].append([])
                for k in range(0, numberOfFilters):
                    for d in range(0, depth):
                        suma = 0
                        for h in range(0, filterSize[0]):
                            for w in range(0, filterSize[1]):
                                suma = suma + filter[h][w] * input[i + h - lineStart][j + w - columnStart][d]
                        result[-1][-1].append(suma)
        return result


    def applyMaxPooling(self, layer, input):
        filterSize = layer["filterSize"]
        height, width = filterSize[0], filterSize[1]
        inputHeight, inputWidth, inputDepth = len(input), len(input[0]), len(input[0][0])

        result = []
        for i in range(0, inputHeight - height, height):
            result.append([])
            for j in range(0, inputWidth - width, width):
                result[-1].append([])
                for d in range(0, inputDepth):
                    filterValues = []
                    for row in range(i, i + height):
                        for col in range(j, j + width):
                            filterValues.append(input[row][col][d])
                    maxValue = max(filterValues)
                    result[-1][-1].append(maxValue)
        return result


    def inputLinearization(self, input):
        if all(isinstance(x, (int, float)) for x in input):
            return input
        result = []
        inputHeight, inputWidth, inputDepth = len(input), len(input[0]), len(input[0][0])
        for h in range(0, inputHeight):
            for w in range(0, inputWidth):
                for d in range(0, inputDepth):
                    result.append(input[h][w][d])
        return result

    def applyFullyConnected(self, layer, input):
        linearInput = self.inputLinearization(input)
        weights = layer["weights"]
        activationFunction = layer["activationFunction"]
        result = []
        for w in range(0, len(weights)):
            value = sum(weights[w][i] * linearInput[i] for i in range(0, len(linearInput)))
            computedValue = activationFunction(value)
            result.append(computedValue)
        return result
    
    def applyAllLayers(self, input):
        result = input
        for layer in self.layers:
            result = self._applyLayer(result, layer)
        return result
    
    def balanceWeightsConvolution(self, errors, layer):
        learning_rate = 0.0000001
        for error in errors:
            filter = layer["filter"]
            filterSize = layer["filterSize"]
            for h in range(0, filterSize[0]):
                for w in range(0, filterSize[1]):
                    layer["filter"][h][w] = layer["filter"][h][w] - error * learning_rate
    
    def balanceWeigthsFullyConnected(self, errors, layer, inputLine):
        learning_rate = 0.0000001
        for e in range(0, len(errors)):
            result = inputLine
            for layer1 in self.layers[:-1]:
                result = self._applyLayer(result, layer1)
            linearResult = self.inputLinearization(result)

            for i in range(0, len(linearResult)):
                layer["weights"][e][i] = layer["weights"][e][i] - errors[e] * learning_rate * linearResult[i]

    
    def backpropagation(self, errors, inputLine):
        for i in range(len(self.layers) - 1, -1, -1):
            layer = self.layers[i]
            type = layer["type"]
            if type == "convolution":
                self.balanceWeightsConvolution(errors, layer)
            elif type == "fullyConnected":
                self.balanceWeigthsFullyConnected(errors, layer, inputLine)

    def train(self, input, output, numberOfEpochs = 10):
        for e in range(0, numberOfEpochs):
            loss = 0
            for i in range(0, len(input)):
                inputLine = input[i]
                outputLine = output[i]
                result = self.applyAllLayers(inputLine)
                
                if outputLine == "YES":
                    errors = [1 - result[0], 0 - result[1]]
                    determined = 1
                else:
                    errors = [0 - result[0], 1 - result[1]]
                    determined = 0

                loss = loss + determined * log(result[0]) + (1 - determined) * log(1 - result[0])
                self.backpropagation(errors, inputLine)

            loss = -1 * loss / len(input)
            print("Epoch ", e, " loss = ", loss, "\n")

    def predict(self, input):
        output = []
        for inputItem in input:
            probabilities = self.applyAllLayers(inputItem)
            if probabilities[0] > probabilities[1]:
                output.append("YES")
            else:
                output.append("NO")
        return output


In [32]:
def clasificationPerformance(ground_truth, computed_values, positive_label):
    """
    Returneaza TN (True Negative), FP(False Positive), FN(False Negative), TP(True Positive)
    """
    TN = 0
    FP = 0
    FN = 0
    TP = 0
    
    for i in range(0, len(ground_truth)):
        #consideram malign = positive, benign = negative 
        if ground_truth[i] == positive_label:
            if computed_values[i] == positive_label:
                TP += 1
            else:
                FP += 1
        else:
            if computed_values[i] != positive_label:
                TN += 1
            else:
                FN += 1
    return TN, FP, FN, TP


def getAccuracy(TN, FP, FN, TP):
    """ 
    accuracy represents the overall performance of classification model:
    (TP+TN)/(TN+FP+FN+TP)
    """
    if (TN+FP+FN+TP) == 0:
        return 0
    return (TP+TN)/(TN+FP+FN+TP)

def getPrecision(FP, TP):
    """
    precision indicates how accurate the positive predictions are 
    TP/(TP+FP)
    """
    if (TP+FP) == 0:
        return 0
    return TP/(TP+FP)

def getRecall(TP, FN):
    """ 
    recall indicates the coverage of actual positive sample
    TP/(TP+FN)
    """
    if (TP+FN) == 0:
        return 0
    return TP/(TP+FN)


In [33]:
import numpy as np 
import matplotlib.pyplot as plt 
from PIL import Image
import pandas as pd
from cmath import exp

def readData() -> pd.DataFrame:
    dataFrame = pd.read_csv("datas.csv")
    dataFrame = dataFrame.dropna()
    return dataFrame


def getTrainingAndValidationSets():
    np.random.seed(5)
    dataFrame = readData()
    dataSize = dataFrame.shape[0]
    
    trainingIndexSet = np.random.choice(range(dataSize), size=int(0.7 * dataSize), replace=False)
    validationIndexSet = [i for i in range(dataSize) if i not in trainingIndexSet]

    trainingInputSet = [dataFrame["Photo"].iloc[index] for index in trainingIndexSet]
    trainingOutputSet = [dataFrame["Has Filter"].iloc[index] for index in trainingIndexSet]

    validationInputSet = [dataFrame["Photo"].iloc[index] for index in validationIndexSet]
    validationOutputSet = [dataFrame["Has Filter"].iloc[index] for index in validationIndexSet]

    return trainingInputSet, trainingOutputSet, validationInputSet, validationOutputSet


def getRGBValuesForAllImages(inputImages, size):
    rgbValues = []
    for imagePath in inputImages:
        rgbValues.append([])
        image = Image.open(imagePath)
        image = image.resize(size)
        width, height = image.size
        pixel_data = list(image.getdata())
        for y in range(height):
            row_pixels = pixel_data[y * width : (y + 1) * width]
            rgbValues[-1].append([])
            for pixel in row_pixels:
                r,g,b = pixel[0],pixel[1],pixel[2]
                maxi = max([r,g,b])
                mini = min([r,g,b])
                if mini == maxi:
                    r = g = b = 0
                else:
                    r = (r-mini)/(maxi-mini)
                    g = (g-mini)/(maxi-mini)
                    b = (b-mini)/(maxi-mini)
                rgbValues[-1][-1].append([r,g,b])
    return rgbValues

def tang_func(x):
    from numpy import tanh
    return tanh(x)

def getClassifier(trainingInputs, trainingOutputs):
    neuralNetwork = CnnNetwork()
    neuralNetwork.addConvolutionLayer(3, (3,3))
    neuralNetwork.addMaxPoolingLayer((4,4))
    neuralNetwork.addFullyConnectedLayer(15*15*9, 10, tang_func)
    neuralNetwork.addFullyConnectedLayer(10, 2, lambda x : 1 / (1 + exp(-x).real))  
    neuralNetwork.train(trainingInputs, trainingOutputs)
    return neuralNetwork


def testClassifier(size):
    trainingInputSet, trainingOutputSet, validationInputSet, validationOutputSet = getTrainingAndValidationSets()
    trainingInputs = getRGBValuesForAllImages(trainingInputSet, size)
    trainingOutputs = trainingOutputSet
    classifier = getClassifier(trainingInputs, trainingOutputs)

    validaitonInputs = getRGBValuesForAllImages(validationInputSet, size)
    outputs = classifier.predict(validaitonInputs)

    TN, FP, FN, TP = clasificationPerformance(validationOutputSet, outputs, "YES")
    accuracy = getAccuracy(TN, FP, FN, TP)
    precision = getPrecision(FP, TP)
    recall = getRecall(TP, FN)
    print("Accuracy: {}\nPrecision: {}\nRecall: {}".format(accuracy, precision, recall))

In [34]:
testClassifier((62, 62))

Epoch  0  loss =  2.202246447910764 

Epoch  1  loss =  2.2022344430395178 

Epoch  2  loss =  2.2022224340265493 

Epoch  3  loss =  2.202210420888443 

Epoch  4  loss =  2.202198403641668 

Epoch  5  loss =  2.2021863826293595 

Epoch  6  loss =  2.202174361997136 

Epoch  7  loss =  2.202162337307895 

Epoch  8  loss =  2.2021503085781218 

Epoch  9  loss =  2.2021382758243364 

Accuracy: 0.48148148148148145
Precision: 1.0
Recall: 0.36363636363636365
