In [None]:
# Import libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os
import cv2
import random

In [None]:
# Load in dataset
train_dir = "/kaggle/input/70-dog-breedsimage-data-set"
data_df = pd.read_csv("/kaggle/input/70-dog-breedsimage-data-set/dogs.csv")
data_df.head()

In [None]:
# Split into training, validation, and test data
train_df = data_df[data_df.iloc[:, 2] == "train"].copy()
valid_df = data_df[data_df.iloc[:, 2] == "valid"].copy()
test_df = data_df[data_df.iloc[:, 2] == "test"].copy()
train_df.head()

In [None]:
# Encode labels
encoded_train_labels, _ = pd.factorize(train_df["labels"])
encoded_valid_labels, _ = pd.factorize(valid_df["labels"])
encoded_test_labels, _ = pd.factorize(test_df["labels"])
train_df["encoded_labels"] = encoded_train_labels
valid_df["encoded_labels"] = encoded_valid_labels
test_df["encoded_labels"] = encoded_test_labels

print(set(encoded_labels))
data_df.head()

In [None]:
# Drop unnecessary columns
columns_to_drop = ["labels", "data set"]
train_df = train_df.drop(columns = columns_to_drop)
valid_df = valid_df.drop(columns = columns_to_drop)
test_df = test_df.drop(columns = columns_to_drop)
print(train_df.head())
print(valid_df.head())
print(test_df.head())

In [None]:
# Make labels first column
new_order = ["encoded_labels", "filepaths"]
train_df = train_df[new_order]
valid_df = valid_df[new_order]
test_df = test_df[new_order]
print(train_df.head())
print(valid_df.head())
print(test_df.head())

In [None]:
# TRAINING
# Convert filepaths to numpy arrays
train_df["images"] = train_dir + "/" + train_df["filepaths"]
train_df = train_df.drop(columns=["filepaths"])
train_df["images"] = train_df["images"].apply(lambda path: cv2.imread(path, cv2.IMREAD_GRAYSCALE))
train_df.head()

In [None]:
# Flatten images
pixels = np.stack(train_df['images'].values).reshape(len(train_df), -1)

In [None]:
# Create columns for individual pixels of images
pixels_df = pd.DataFrame(pixels, columns=[f"pixel_{i}" for i in range(pixels.shape[1])])
train_df = pd.concat([train_df['encoded_labels'], pixels_df], axis=1)
train_df.head()

In [None]:
# Create numpy array representing all training images
train_df = np.array(train_df)
np.random.shuffle(train_df)

In [None]:
# Separate image data from labels
train_arr = train_df
X_train = train_arr[:, 1:] / 255
Y_train = train_arr[:, 0]
m, n = X_train.shape
output_size = len(set(encoded_train_labels))
hidden_size = n * 2 // 3 + output_size
X_train.shape

In [None]:
# Define activation function for hidden layer
def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return Z > 0

# Define activation function for output layer
def softmax(Z):
    Z -= np.max(Z, axis=0, keepdims=True)
    A = np.exp(Z) / np.sum(np.exp(Z), axis=0, keepdims=True)
    return A

In [None]:
#Initialize weights and biases
def init_params():
    W1 = np.random.randn(n, hidden_size)
    b1 = np.zeros(hidden_size)
    W2 = np.random.randn(hidden_size, output_size)
    b2 = np.zeros(output_size)
    
    W1_gradient = np.zeros(n, hidden_size)
    b1_gradient = np.zeros(hidden_size)
    W2_gradient = np.zeros(hidden_size, output_size)
    b2_gradient = np.zeros(output_size)
    return W1, b1, W2, b2, W1_gradient, b1_gradient, W2_gradient, b2_gradient

In [None]:
# Define forward propagation
def forward_prop(W1, b1, W2, b2, X):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    loss = compute_cross_entropy_loss(A2, Y)
    accuracy = compute_accuracy(A2, Y)
    return Z1, A1, Z2, A2, loss, accuracy

In [None]:
# Define backward propagation
def backward_prop(Z1, A1, Z2, A2, W1, b1, W2, b2, X, Y):
    dLZ2 = S2.copy()
    dLZ2[range(len(X)), y] -= 1
    dLZ2 /= len(X)

    dLW2 = np.dot(np.transpose(S1), dLZ2)
    dB2 = np.sum(dLZ2, axis=0)

    dLS1 = np.dot(dLZ2, np.transpose(W2)
    dLZ1 = dLS1 * relu_derivative(Z1)

    dLW1 = np.dot(np.transpose(X), dLZ1)
    dB1 = np.sum(dLZ1, axis=0)

    W1_gradient = dLW1
    b1_gradient = dB1
    W2_gradient = dLW2
    b2_gradient = dB2
    return W1_gradient, b1_gradient, W2_gradient, b2_gradient

In [None]:
def compute_accuracy(X_pred, Y):
    preds = np.argmax(X_pred, axis=1)
    accuracy = np.mean(preds == Y)
    return accuracy

def compute_cross_entropy_loss(X_pred, Y):
    log_likelihood = -np.log(X_pred[range(len(Y)), Y])
    loss = np.sum(log_likelihood) / len(Y)
    return loss