In [13]:
def clasificationPerformance(ground_truth, computed_values, positive_label):
    """
    Returneaza TN (True Negative), FP(False Positive), FN(False Negative), TP(True Positive)
    """
    TN = 0
    FP = 0
    FN = 0
    TP = 0
    
    for i in range(0, len(ground_truth)):
        #consideram malign = positive, benign = negative 
        if ground_truth[i] == positive_label:
            if computed_values[i] == positive_label:
                TP += 1
            else:
                FP += 1
        else:
            if computed_values[i] != positive_label:
                TN += 1
            else:
                FN += 1
    return TN, FP, FN, TP


def getAccuracy(TN, FP, FN, TP):
    """ 
    accuracy represents the overall performance of classification model:
    (TP+TN)/(TN+FP+FN+TP)
    """
    if (TN+FP+FN+TP) == 0:
        return 0
    return (TP+TN)/(TN+FP+FN+TP)

def getPrecision(FP, TP):
    """
    precision indicates how accurate the positive predictions are 
    TP/(TP+FP)
    """
    if (TP+FP) == 0:
        return 0
    return TP/(TP+FP)

def getRecall(TP, FN):
    """ 
    recall indicates the coverage of actual positive sample
    TP/(TP+FN)
    """
    if (TP+FN) == 0:
        return 0
    return TP/(TP+FN)


In [14]:
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn import neural_network
import matplotlib.pyplot as plt
from sklearn import linear_model
from PIL import Image
import pandas as pd

def readData() -> pd.DataFrame:
    dataFrame = pd.read_csv("datas.csv")
    dataFrame = dataFrame.dropna()
    return dataFrame


def getTrainingAndValidationSets():
    np.random.seed(5)
    dataFrame = readData()
    dataSize = dataFrame.shape[0]
    
    trainingIndexSet = np.random.choice(range(dataSize), size=int(0.7 * dataSize), replace=False)
    validationIndexSet = [i for i in range(dataSize) if i not in trainingIndexSet]

    trainingInputSet = [dataFrame["Photo"].iloc[index] for index in trainingIndexSet]
    trainingOutputSet = [dataFrame["Has Filter"].iloc[index] for index in trainingIndexSet]

    validationInputSet = [dataFrame["Photo"].iloc[index] for index in validationIndexSet]
    validationOutputSet = [dataFrame["Has Filter"].iloc[index] for index in validationIndexSet]

    return trainingInputSet, trainingOutputSet, validationInputSet, validationOutputSet


def getRGBValuesForAllImages(inputImages, size):
    rgbValues = []
    for imagePath in inputImages:
        rgbValues.append([])
        image = Image.open(imagePath)
        image = image.resize(size)

        for pixel in list(image.getdata()):
            r, g, b = pixel[0], pixel[1], pixel[2]
            rgbValues[-1].append(r)
            rgbValues[-1].append(g)
            rgbValues[-1].append(b)
    return rgbValues

In [15]:
def getClassifier(numberOfHiddenLayers, activationFunction, trainingInputs, trainingOutputs):
    classifier = neural_network.MLPClassifier(hidden_layer_sizes=(numberOfHiddenLayers,), activation=activationFunction, max_iter=100, solver='sgd', verbose=10, random_state=1, learning_rate_init=.1)
    classifier.fit(trainingInputs, trainingOutputs)
    return classifier

def testClassifier(numberOfHiddenLayers, activationFunction, size):
    trainingInputSet, trainingOutputSet, validationInputSet, validationOutputSet = getTrainingAndValidationSets()
    trainingInputs = getRGBValuesForAllImages(trainingInputSet, size)
    trainingOutputs = trainingOutputSet
    classifier = getClassifier(numberOfHiddenLayers, activationFunction, trainingInputs, trainingOutputs)

    validaitonInputs = getRGBValuesForAllImages(validationInputSet, size)
    outputs = classifier.predict(validaitonInputs)

    TN, FP, FN, TP = clasificationPerformance(validationOutputSet, outputs, "YES")
    accuracy = getAccuracy(TN, FP, FN, TP)
    precision = getPrecision(FP, TP)
    recall = getRecall(TP, FN)
    print("Accuracy: {}\nPrecision: {}\nRecall: {}".format(accuracy, precision, recall))


# Testarea clasificatorului - influenta (hyper)parametrilor

In [16]:
testClassifier(numberOfHiddenLayers=50, activationFunction='tanh', size=(128, 128))

Iteration 1, loss = 0.70778898
Iteration 2, loss = 0.74905773
Iteration 3, loss = 0.73851225
Iteration 4, loss = 0.71609072
Iteration 5, loss = 0.70337733
Iteration 6, loss = 0.68988153
Iteration 7, loss = 0.67822156
Iteration 8, loss = 0.66700212
Iteration 9, loss = 0.65875862
Iteration 10, loss = 0.64818881
Iteration 11, loss = 0.63938426
Iteration 12, loss = 0.63259722
Iteration 13, loss = 0.62680634
Iteration 14, loss = 0.62183166
Iteration 15, loss = 0.61754836
Iteration 16, loss = 0.61383912
Iteration 17, loss = 0.61061695
Iteration 18, loss = 0.60779903
Iteration 19, loss = 0.60531744
Iteration 20, loss = 0.60311425
Iteration 21, loss = 0.60114260
Iteration 22, loss = 0.59936483
Iteration 23, loss = 0.59775120
Iteration 24, loss = 0.59627839
Iteration 25, loss = 0.59492801
Iteration 26, loss = 0.59368547
Iteration 27, loss = 0.59253887
Iteration 28, loss = 0.59147829
Iteration 29, loss = 0.59049516
Iteration 30, loss = 0.58958187
Iteration 31, loss = 0.58873147
Iteration 32, los



Accuracy: 0.37037037037037035
Precision: 0.625
Recall: 0.2631578947368421


In [17]:
testClassifier(numberOfHiddenLayers=50, activationFunction='tanh', size=(256, 256))

Iteration 1, loss = 0.89642108
Iteration 2, loss = 0.63412296
Iteration 3, loss = 0.69794357
Iteration 4, loss = 0.70072646
Iteration 5, loss = 0.63723151
Iteration 6, loss = 0.62925079
Iteration 7, loss = 0.67682228
Iteration 8, loss = 0.65885101
Iteration 9, loss = 0.62245201
Iteration 10, loss = 0.61761372
Iteration 11, loss = 0.61410803
Iteration 12, loss = 0.61147288
Iteration 13, loss = 0.68786381
Iteration 14, loss = 0.68688523
Iteration 15, loss = 0.65172629
Iteration 16, loss = 0.65254372
Iteration 17, loss = 0.65369573
Iteration 18, loss = 0.65473163
Iteration 19, loss = 0.65558411
Iteration 20, loss = 0.65622271
Iteration 21, loss = 0.65663998
Iteration 22, loss = 0.65684512
Iteration 23, loss = 0.65685938
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Accuracy: 0.7037037037037037
Precision: 0.375
Recall: 0.5


In [18]:
testClassifier(numberOfHiddenLayers=10, activationFunction='tanh', size=(128, 128))

Iteration 1, loss = 0.79744221
Iteration 2, loss = 0.89811683
Iteration 3, loss = 0.85280716
Iteration 4, loss = 0.70312468
Iteration 5, loss = 0.68155767
Iteration 6, loss = 0.71984763
Iteration 7, loss = 0.74669320
Iteration 8, loss = 0.74502487
Iteration 9, loss = 0.72723440
Iteration 10, loss = 0.71239141
Iteration 11, loss = 0.71017996
Iteration 12, loss = 0.71709960
Iteration 13, loss = 0.72494467
Iteration 14, loss = 0.72939316
Iteration 15, loss = 0.73113195
Iteration 16, loss = 0.73253373
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Accuracy: 0.6296296296296297
Precision: 0.625
Recall: 0.4166666666666667


In [19]:
testClassifier(numberOfHiddenLayers=200, activationFunction='tanh', size=(256, 256))

Iteration 1, loss = 0.79384520
Iteration 2, loss = 0.78165137
Iteration 3, loss = 1.99374320
Iteration 4, loss = 5.60516322
Iteration 5, loss = 1.36340960
Iteration 6, loss = 6.52872083
Iteration 7, loss = 0.66827362
Iteration 8, loss = 2.08375171
Iteration 9, loss = 5.86981249
Iteration 10, loss = 2.03327417
Iteration 11, loss = 5.38224083
Iteration 12, loss = 1.00655038
Iteration 13, loss = 2.88263334
Iteration 14, loss = 4.09815890
Iteration 15, loss = 0.59241448
Iteration 16, loss = 0.99419568
Iteration 17, loss = 1.75542240
Iteration 18, loss = 2.42638052
Iteration 19, loss = 2.69942793
Iteration 20, loss = 1.16065202
Iteration 21, loss = 1.12925286
Iteration 22, loss = 1.20153064
Iteration 23, loss = 1.13703687
Iteration 24, loss = 0.99728844
Iteration 25, loss = 0.96798196
Iteration 26, loss = 0.80548307
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Accuracy: 0.5555555555555556
Precision: 0.25
Recall: 0.25


In [20]:
testClassifier(numberOfHiddenLayers=50, activationFunction='logistic', size=(128, 128))

Iteration 1, loss = 0.70254298
Iteration 2, loss = 0.70038306
Iteration 3, loss = 0.68606223
Iteration 4, loss = 0.68804875
Iteration 5, loss = 0.68464230
Iteration 6, loss = 0.68567518
Iteration 7, loss = 0.68464784
Iteration 8, loss = 0.68438763
Iteration 9, loss = 0.68440728
Iteration 10, loss = 0.68440633
Iteration 11, loss = 0.68441041
Iteration 12, loss = 0.68441382
Iteration 13, loss = 0.68441458
Iteration 14, loss = 0.68441286
Iteration 15, loss = 0.68440860
Iteration 16, loss = 0.68440187
Iteration 17, loss = 0.68439275
Iteration 18, loss = 0.68438138
Iteration 19, loss = 0.68436788
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Accuracy: 0.7037037037037037
Precision: 0.0
Recall: 0


In [21]:
testClassifier(numberOfHiddenLayers=100, activationFunction='relu', size=(128, 128))

Iteration 1, loss = 14.31189336
Iteration 2, loss = 17.93621003
Iteration 3, loss = 5026343.77331306
Iteration 4, loss = 4954687308.09134960
Iteration 5, loss = 10075449157.00680351
Iteration 6, loss = 16222098596.91878319
Iteration 7, loss = 22999828687.46001816
Iteration 8, loss = 30108833965.59874725
Iteration 9, loss = 37324261489.73435211
Iteration 10, loss = 44480171015.42481232
Iteration 11, loss = 51456722602.41344452
Iteration 12, loss = 58169960458.33673859
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Accuracy: 0.7037037037037037
Precision: 0.0
Recall: 0


In [24]:
testClassifier(numberOfHiddenLayers=10, activationFunction='relu', size=(128, 128))

Iteration 1, loss = 16.06751443
Iteration 2, loss = 17.26917865
Iteration 3, loss = 118964.79247998
Iteration 4, loss = 20956157.24016824
Iteration 5, loss = 42531836.98216401
Iteration 6, loss = 68418387.16202605
Iteration 7, loss = 96955696.05411910
Iteration 8, loss = 126883175.02684046
Iteration 9, loss = 157255439.50506768
Iteration 10, loss = 187374851.41160014
Iteration 11, loss = 216737635.34009638
Iteration 12, loss = 244990913.78694135
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Accuracy: 0.7037037037037037
Precision: 0.0
Recall: 0
