In [18]:
import os
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

train_clean = r'C:\Zoshyn\Classwork\ML\Datasets\Clean\train\clean'
train_messy = r'C:\Zoshyn\Classwork\ML\Datasets\Clean\train\messy'
test_clean = r'C:\Zoshyn\Classwork\ML\Datasets\Clean\val\clean'
test_messy = r'C:\Zoshyn\Classwork\ML\Datasets\Clean\val\messy'

dimension = 128

In [27]:
def load_dataset():
    x_train_list = []
    y_train_list = []
    x_test_list = []
    y_test_list = []
    
    for img_file in os.listdir(train_clean):
        path = os.path.join(train_clean, img_file)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (dimension, dimension))
        x_train_list.append(img)
        y_train_list.append(0)
    for img_file in os.listdir(train_messy):
        path = os.path.join(train_messy, img_file)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (dimension, dimension))
        x_train_list.append(img)
        y_train_list.append(1)
    
    for img_file in os.listdir(test_clean):
        path = os.path.join(test_clean, img_file)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (dimension, dimension))
        x_test_list.append(img)
        y_test_list.append(0)
    for img_file in os.listdir(test_messy):
        path = os.path.join(test_messy, img_file)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (dimension, dimension))
        x_test_list.append(img)
        y_test_list.append(1)
    
    return np.array(x_train_list), np.array(y_train_list), np.array(x_test_list), np.array(y_test_list)

In [28]:
x_train_original, y_train_original, x_test_original, y_test_original = load_dataset()


x_train = x_train_original.reshape(x_train_original.shape[0], -1).T
x_test = x_test_original.reshape(x_test_original.shape[0], -1).T
y_train = y_train_original.reshape(x_train_original.shape[0], 1).T
y_test = y_test_original.reshape(x_test_original.shape[0], 1).T

x_train = (x_train - np.min(x_train)) / (np.max(x_train) - np.min(x_train))
x_test = (x_test - np.min(x_test)) / (np.max(x_test) - np.min(x_test))

print("x_train.shape: " + str(x_train.shape))
print("y_train.shape: " + str(y_train.shape))
print("x_test.shape: " + str(x_test.shape))
print("y_test.shape: " + str(y_test.shape))

x_train.shape: (16384, 217)
y_train.shape: (1, 217)
x_test.shape: (16384, 20)
y_test.shape: (1, 20)


In [21]:
def activation(z):
    s = 1/(1+np.exp(-z))
    return s

def propagate(w, b, X, Y):
    m = X.shape[1]
    A = activation(np.dot(w.T, X) + b)
    cost = -np.sum(np.multiply(Y, np.log(A)) + np.multiply((1-Y), np.log(1-A)))/m
    
    dw = (np.dot(X, ((A-Y).T)))/m
    db = np.sum((A-Y), axis=1)/m
    
    cost = np.squeeze(cost)
    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

def optimize(w, b, X, Y, num_iterations, learning_rate):
    costs = []
    
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]
        
        w = w - (learning_rate * dw) 
        b = b - (learning_rate * db) 
        
        costs.append(cost)
        print ("Cost after iteration %i: %f" %(i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0], 1)
    A = activation(np.dot(w.T, X) + b)
    
    for i in range(A.shape[1]):
        if(A[0][i] > 0.5):
            Y_prediction[0][i] = 1
        else:
            Y_prediction[0][i] = 0
    return Y_prediction

def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5):
    w = np.zeros((X_train.shape[0], 1))
    b = 0
    
    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate)

    w = parameters["w"]
    b = parameters["b"]
    
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)
    
    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    
    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test, 
         "Y_prediction_train" : Y_prediction_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    
    return d

In [30]:
d = model(x_train, y_train, x_test, y_test, num_iterations = 1500, learning_rate = 0.01)

Cost after iteration 0: 0.693147
Cost after iteration 1: 1.665956
Cost after iteration 2: 6.813127
Cost after iteration 3: 5.223928
Cost after iteration 4: 1.814029
Cost after iteration 5: 7.717224
Cost after iteration 6: 1.265719
Cost after iteration 7: 6.681853
Cost after iteration 8: 5.169173
Cost after iteration 9: 1.693273
Cost after iteration 10: 7.520505
Cost after iteration 11: 1.093604
Cost after iteration 12: 6.285895
Cost after iteration 13: 5.294035
Cost after iteration 14: 1.354645
Cost after iteration 15: 6.907044
Cost after iteration 16: 0.664570
Cost after iteration 17: 3.794024
Cost after iteration 18: 6.776683
Cost after iteration 19: 0.606234
Cost after iteration 20: 2.930887
Cost after iteration 21: 7.165827
Cost after iteration 22: 0.831964
Cost after iteration 23: 4.906748
Cost after iteration 24: 5.939678
Cost after iteration 25: 0.587220
Cost after iteration 26: 2.219528
Cost after iteration 27: 5.296712
Cost after iteration 28: 5.601445
Cost after iteration 29:

Cost after iteration 237: 1.521521
Cost after iteration 238: 2.942121
Cost after iteration 239: 0.944107
Cost after iteration 240: 1.782085
Cost after iteration 241: 1.478435
Cost after iteration 242: 2.846838
Cost after iteration 243: 0.973415
Cost after iteration 244: 1.825850
Cost after iteration 245: 1.434502
Cost after iteration 246: 2.747301
Cost after iteration 247: 1.004598
Cost after iteration 248: 1.872044
Cost after iteration 249: 1.389695
Cost after iteration 250: 2.643056
Cost after iteration 251: 1.037130
Cost after iteration 252: 1.919250
Cost after iteration 253: 1.344341
Cost after iteration 254: 2.534372
Cost after iteration 255: 1.069780
Cost after iteration 256: 1.964657
Cost after iteration 257: 1.299381
Cost after iteration 258: 2.422947
Cost after iteration 259: 1.100299
Cost after iteration 260: 2.003628
Cost after iteration 261: 1.256604
Cost after iteration 262: 2.312659
Cost after iteration 263: 1.125457
Cost after iteration 264: 2.030009
Cost after iteration

Cost after iteration 471: 0.117978
Cost after iteration 472: 0.128650
Cost after iteration 473: 0.141070
Cost after iteration 474: 0.167651
Cost after iteration 475: 0.197710
Cost after iteration 476: 0.287620
Cost after iteration 477: 0.390092
Cost after iteration 478: 0.873243
Cost after iteration 479: 1.269601
Cost after iteration 480: 3.232366
Cost after iteration 481: 0.533403
Cost after iteration 482: 1.310878
Cost after iteration 483: 1.472947
Cost after iteration 484: 3.426178
Cost after iteration 485: 0.398850
Cost after iteration 486: 0.786834
Cost after iteration 487: 1.022977
Cost after iteration 488: 2.617621
Cost after iteration 489: 0.859465
Cost after iteration 490: 2.168479
Cost after iteration 491: 1.116293
Cost after iteration 492: 2.716637
Cost after iteration 493: 0.740029
Cost after iteration 494: 1.733480
Cost after iteration 495: 1.230968
Cost after iteration 496: 2.836506
Cost after iteration 497: 0.625964
Cost after iteration 498: 1.304888
Cost after iteration

Cost after iteration 705: 0.047135
Cost after iteration 706: 0.047045
Cost after iteration 707: 0.046955
Cost after iteration 708: 0.046866
Cost after iteration 709: 0.046778
Cost after iteration 710: 0.046690
Cost after iteration 711: 0.046603
Cost after iteration 712: 0.046517
Cost after iteration 713: 0.046431
Cost after iteration 714: 0.046346
Cost after iteration 715: 0.046262
Cost after iteration 716: 0.046178
Cost after iteration 717: 0.046095
Cost after iteration 718: 0.046012
Cost after iteration 719: 0.045930
Cost after iteration 720: 0.045849
Cost after iteration 721: 0.045768
Cost after iteration 722: 0.045687
Cost after iteration 723: 0.045608
Cost after iteration 724: 0.045528
Cost after iteration 725: 0.045450
Cost after iteration 726: 0.045371
Cost after iteration 727: 0.045294
Cost after iteration 728: 0.045216
Cost after iteration 729: 0.045140
Cost after iteration 730: 0.045064
Cost after iteration 731: 0.044988
Cost after iteration 732: 0.044913
Cost after iteration

Cost after iteration 939: 0.035059
Cost after iteration 940: 0.035027
Cost after iteration 941: 0.034995
Cost after iteration 942: 0.034963
Cost after iteration 943: 0.034932
Cost after iteration 944: 0.034900
Cost after iteration 945: 0.034868
Cost after iteration 946: 0.034837
Cost after iteration 947: 0.034806
Cost after iteration 948: 0.034774
Cost after iteration 949: 0.034743
Cost after iteration 950: 0.034712
Cost after iteration 951: 0.034681
Cost after iteration 952: 0.034650
Cost after iteration 953: 0.034619
Cost after iteration 954: 0.034589
Cost after iteration 955: 0.034558
Cost after iteration 956: 0.034527
Cost after iteration 957: 0.034497
Cost after iteration 958: 0.034467
Cost after iteration 959: 0.034436
Cost after iteration 960: 0.034406
Cost after iteration 961: 0.034376
Cost after iteration 962: 0.034346
Cost after iteration 963: 0.034316
Cost after iteration 964: 0.034286
Cost after iteration 965: 0.034256
Cost after iteration 966: 0.034226
Cost after iteration

Cost after iteration 1168: 0.029414
Cost after iteration 1169: 0.029395
Cost after iteration 1170: 0.029376
Cost after iteration 1171: 0.029356
Cost after iteration 1172: 0.029337
Cost after iteration 1173: 0.029318
Cost after iteration 1174: 0.029299
Cost after iteration 1175: 0.029280
Cost after iteration 1176: 0.029261
Cost after iteration 1177: 0.029242
Cost after iteration 1178: 0.029223
Cost after iteration 1179: 0.029204
Cost after iteration 1180: 0.029185
Cost after iteration 1181: 0.029166
Cost after iteration 1182: 0.029147
Cost after iteration 1183: 0.029128
Cost after iteration 1184: 0.029110
Cost after iteration 1185: 0.029091
Cost after iteration 1186: 0.029072
Cost after iteration 1187: 0.029053
Cost after iteration 1188: 0.029035
Cost after iteration 1189: 0.029016
Cost after iteration 1190: 0.028997
Cost after iteration 1191: 0.028979
Cost after iteration 1192: 0.028960
Cost after iteration 1193: 0.028942
Cost after iteration 1194: 0.028923
Cost after iteration 1195: 0

Cost after iteration 1395: 0.025743
Cost after iteration 1396: 0.025729
Cost after iteration 1397: 0.025716
Cost after iteration 1398: 0.025702
Cost after iteration 1399: 0.025688
Cost after iteration 1400: 0.025675
Cost after iteration 1401: 0.025661
Cost after iteration 1402: 0.025648
Cost after iteration 1403: 0.025634
Cost after iteration 1404: 0.025621
Cost after iteration 1405: 0.025607
Cost after iteration 1406: 0.025594
Cost after iteration 1407: 0.025580
Cost after iteration 1408: 0.025567
Cost after iteration 1409: 0.025554
Cost after iteration 1410: 0.025540
Cost after iteration 1411: 0.025527
Cost after iteration 1412: 0.025514
Cost after iteration 1413: 0.025500
Cost after iteration 1414: 0.025487
Cost after iteration 1415: 0.025474
Cost after iteration 1416: 0.025460
Cost after iteration 1417: 0.025447
Cost after iteration 1418: 0.025434
Cost after iteration 1419: 0.025421
Cost after iteration 1420: 0.025408
Cost after iteration 1421: 0.025394
Cost after iteration 1422: 0