In [18]:
import os

# # # parent, subdirs, files
# for parent, subdirs, files in os.walk('./happy_dataset'):
#     for file in files:
#         if not ('_2' in file or '_4' in file):
#             if ('happy' in file):
#                 os.rename(os.path.join(parent, file), f'./happy_dataset/happy/{file[:file.find(".")]}.pgm')
#             elif ('sad' in file):
#                 os.rename(os.path.join(parent, file), f'./happy_dataset/sad/{file[:file.find(".")]}.pgm')

import h5py
import numpy as np
from imageio.v2 import imread

file_names = []
happy_ds = []
for parent,subdirs,files in os.walk('./happy_dataset/happy'):
    for file in files:
        file_path = os.path.join(parent, file)
        img_data = imread(file_path)
        happy_ds.append(img_data)
        file_names.append(file_path)
    
sad_ds = []
for parent,subdirs,files in os.walk('./happy_dataset/sad'):
    for file in files:
        file_path = os.path.join(parent, file)
        img_data = imread(file_path)
        sad_ds.append(img_data)
        file_names.append(file_path)

y = np.ones(len(happy_ds)).tolist()
y.extend(np.zeros(len(sad_ds)).tolist())
happy_ds.extend(sad_ds)
with h5py.File("happyds.h5", "w") as file:
    dset = file.create_dataset("x_train", data=np.array(happy_ds))
    dset = file.create_dataset("y_train", data= np.array(y))


In [19]:
import numpy as np
import h5py

# Training rate
alpha = 0.002

def sigmoid(z):
    """Return the logistic function sigma(z) = 1/(1+exp(-z))."""
    return 1 / (1+np.exp(-z))

def cost(Y, Yhat):
    """Return the cost function for predictions Yhat of classifications Y."""
    return (- Y @ np.log(Yhat.T) - (1 - Y) @ np.log(1 - Yhat.T)) / m

def accuracy(Y, Yhat):
    """Return measure of the accuracy with which Yhat predicts Y."""
    return 1 - np.mean(np.abs(Y - Yhat.round()))

def model(X, w, b):
    """Apply the logistic model parameterized by w, b to features X."""
    z = w.T @ X + b
    Yhat = sigmoid(z)
    return z, Yhat


def train(X, Y, max_it=1000):
    """Train the logistic regression algorithm on the data X classified as Y."""

    # Parameter vector, w, and constant term (bias), b.
    # For random initialization, use the following:
    #w, b = np.random.random((nx,1)) * 0.01, 0.01
    # To initialize with zeros, use this line instead:
    w, b = np.zeros((nx,1)), 0

    def propagate(w, b):
        """Propagate the training by advancing w, b to reduce the cost, J."""
        z, Yhat = model(X, w, b)
        w -= alpha / m * (X @ (Yhat - Y).T)
        b -= alpha / m * np.sum(Yhat - Y)
        J = np.squeeze(cost(Y, Yhat))
        if not it % 100:
            # Provide an update on the progress we have made so far.
            print('{}: J = {}'.format(it, J))
            print('train accuracy = {:g}%'.format(accuracy(Y, Yhat) * 100))
        return w, b

    # Train the model by iteratively improving w, b.
    for it in range(max_it):
        w, b = propagate(w, b)
    return w, b

import random

n = len(file_names)
train_indices = random.sample([i for i in range(n)], 3 * n // 4)
test_indices = [i for i in range(n) if not i in train_indices]

ds = h5py.File('./happyds.h5', 'r')

ds_x = np.array([ds["x_train"][i] for i in train_indices]).T / 100
ds_y = np.array([ds["y_train"][i] for i in train_indices])

m = len(ds_y)

# Dimension of the feature vector for each example.
nx = ds_x.size // m
# Packed feature vector and associated classification.
X, Y = ds_x.reshape((nx, m)), ds_y.reshape((1, m))

# Train the model
w, b = train(X, Y, 2000)


0: J = 0.6931471805599452
train accuracy = 50.8%
100: J = 1.8273954583590601
train accuracy = 49.2%
200: J = 1.7578291851311703
train accuracy = 50.8%
300: J = 1.5866040998059456
train accuracy = 54%
400: J = 1.4103664931156494
train accuracy = 56.4%
500: J = 1.2674805700009868
train accuracy = 58.8%
600: J = 1.154277287534191
train accuracy = 61.6%
700: J = 1.0589509696816852
train accuracy = 64%
800: J = 0.9743322184044451
train accuracy = 64.4%
900: J = 0.8969440571429769
train accuracy = 64.8%
1000: J = 0.8249709954342059
train accuracy = 65.2%
1100: J = 0.7573183517178228
train accuracy = 66%
1200: J = 0.6931127143738514
train accuracy = 67.2%
1300: J = 0.6313475281148886
train accuracy = 68.8%
1400: J = 0.5706995407401348
train accuracy = 72.4%
1500: J = 0.509628746056468
train accuracy = 74.8%
1600: J = 0.44687826109538936
train accuracy = 76.4%
1700: J = 0.38254287465859477
train accuracy = 80%
1800: J = 0.3196180542359206
train accuracy = 84.4%
1900: J = 0.2634398203543284
tra

In [47]:
# i = 18
# ind = test_indices[i]
# print(ind)

# features = np.asarray(ds["x_train"][ind], dtype='uint8')[:, :].T / 100
# acc_y = ds["y_train"][ind]

# z, yhat = model(features.reshape(nx, 1), w, b)
# print(acc_y, np.squeeze(yhat))

works = []
fails = []
for inde in test_indices:
    features = np.asarray(ds["x_train"][inde], dtype='uint8')[:, :].T / 100
    acc_y = ds["y_train"][inde]

    z, yhat = model(features.reshape(nx, 1), w, b)
    
    if (yhat < 0.5 and acc_y == 0):
        works.append(inde)
    elif (yhat > 0.5 and acc_y == 1):
        works.append(inde)
    else:
        fails.append(inde)
    
print(fails)


[164, 31, 212, 4, 19, 235, 169, 193, 101, 50, 309, 220, 318, 37, 155, 17, 104, 239, 148, 270, 96, 288, 165, 170, 257, 116, 117, 89, 211, 228, 171, 102, 313, 253, 13, 214, 160, 178, 27, 158, 90]
