In [None]:
import numpy as np
import pandas as pd 
import zipfile
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
import random
import os
print(os.listdir("../input/dogs-vs-cats"))

In [None]:
IMAGE_WIDTH=64
IMAGE_HEIGHT=64
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3
TRAIN_DIRECTORY="/kaggle/working/train/"
TEST_DIRECTORY="/kaggle/working/test1"

In [None]:
def extract_files(source_path, target_path):
    zip_ref = zipfile.ZipFile(source_path,'r')
    zip_ref.extractall(target_path)
    zip_ref.close()

In [None]:
def extraction():
    extract_files('/kaggle/input/dogs-vs-cats/test1.zip','/kaggle/working/')
    extract_files('/kaggle/input/dogs-vs-cats/train.zip','/kaggle/working/')

In [None]:
extraction()

In [None]:
def get_filenames(directory):
    filenames=(os.listdir(directory))
    return filenames

In [None]:
# make input and output data
def load_data(filenames,directory):
#     i=500   #for testing purpose
    i=len(filenames)
    X=[]
    y=[]
    for name in filenames:
        img=mpimg.imread(os.path.join(directory,name))
        X.append(cv2.resize(img,IMAGE_SIZE))
        cat=name.split('.')[0]
        if(cat=='dog'):
            y.append(0)
        else:
            y.append(1)
        i-=1
        if(i<=0):
            break
    return X,y


In [None]:
# loading train data
filenames=get_filenames(TRAIN_DIRECTORY)
X,y=load_data(filenames,TRAIN_DIRECTORY)

In [None]:
# loading test data
test_filenames=get_filenames(TEST_DIRECTORY)
X_test,y_test=load_data(test_filenames,TEST_DIRECTORY)

In [None]:
# showing a sample image
def show_image(filenames,directory):
    sample = random.choice(filenames)
    print(sample)
    plt.imshow(mpimg.imread(directory+sample))
    plt.show()

In [None]:
show_image(filenames,TRAIN_DIRECTORY)

In [None]:
# making a image into 1D array
def refine_data(X,y):
    X=np.array(X)
    X=X.reshape(X.shape[0],-1)
    X=X.T
    X=X/255
    y=np.array(y)
    y=y.reshape((1,y.shape[0]))
    return X,y

In [None]:
X,y=refine_data(X,y)
print(X.shape)
print(y.shape)

In [None]:
# defining layer dimensions of NN
layer_dims=[X.shape[0],500,100,50,25,1]
layer_dims

In [None]:
# defining different methods used in NN

# initialize parameters
def initialize_parameters(layer_dims):
    np.random.seed(1)
    parameters={}
    L=len(layer_dims)
    for l in range(1,L):
        parameters['W'+str(l)]=np.random.randn(layer_dims[l],layer_dims[l-1])/ np.sqrt(layer_dims[l-1]/2)#*0.01
        parameters['b'+str(l)]=np.zeros((layer_dims[l],1))
    return parameters

In [None]:
parameters=initialize_parameters(layer_dims)
for param in parameters:
    print(param+" : "+str(parameters[param].shape))
parameters['W1'][:5]

In [None]:
# defining linear forward function
def linear_fwd(A,W,b):
    Z=np.dot(W,A)+b
    cache=(A,W,b)
    return Z,cache

In [None]:
Z,cache=linear_fwd(X,parameters['W1'],parameters['b1'])
Z.shape

In [None]:
# defining different activation function
def sigmoid(Z):
    A=1/(1+np.exp(-Z))
    cache=Z
    return A,cache
def relu(Z):
    A=np.maximum(Z,0)
    cache=Z
    return A,cache

In [None]:
sigmoid(np.array([0,2]))

In [None]:
relu(np.array([-50,50]))

In [None]:
# def linear activation function
def linear_fwd_activation(A_prev,W,b,activation):
    Z,linear_cache=linear_fwd(A_prev,W,b)
    if activation=='relu':
        A,activation_cache=relu(Z)
    elif activation=='sigmoid':
        A,activation_cache=sigmoid(Z)
    cache=(linear_cache,activation_cache)
    return A,cache
    

In [None]:
A,cache=linear_fwd_activation(X,parameters['W1'],parameters['b1'],'relu')
print(A.shape)

In [None]:
# making forward function
def forward(X,parameters):
    caches=[]
    A=X
    L=len(parameters)//2
    for l in range(1,L):
        A_prev=A
        A,cache=linear_fwd_activation(A_prev,parameters['W'+str(l)],parameters['b'+str(l)],'relu')
        caches.append(cache)
    AL,cache=linear_fwd_activation(A,parameters['W' + str(L)], parameters['b' + str(L)], "sigmoid")
    caches.append(cache)
    assert(AL.shape == (1,X.shape[1]))
    return AL,caches

In [None]:
AL,cache=forward(X,parameters)
print(AL.shape)

In [None]:
def compute_cost(AL,y):
    m=y.shape[1]
    cost=-np.sum(np.dot(y,np.log(AL).T)+np.dot(1-y,np.log(1-AL).T))/m
    cost=np.squeeze(cost)
    assert(cost.shape == ())
    return cost

In [None]:
compute_cost(AL,y)

In [None]:
# making methods of backward propogation
def linear_backward(dz,cache):
    A_prev,W,b=cache
    m=A_prev.shape[1]
    dA_prev=np.dot(W.T,dz)
    dW=np.dot(dz,A_prev.T)/m
    db=np.sum(dz,keepdims=True,axis=1)/m
    return dA_prev,dW,db

In [None]:
def relu_backward(dA,activation_cache):
    Z=activation_cache
    dZ = np.array(dA, copy=True) # just converting dz to a correct object.
    
    # When z <= 0, you should set dz to 0 as well. 
    dZ[Z <= 0] = 0
    
    
    return dZ

In [None]:
def sigmoid_backward(dA, cache):
    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    return dZ

In [None]:

def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db

In [None]:
# backward propogation
def backward(AL,y,caches):
    grads={}
    L=len(caches)
    m = AL.shape[1]
    y = y.reshape(AL.shape)
    dAL=-np.divide(y,AL)+np.divide(1-y,1-AL)
    current_cache=caches[L-1]
    grads['dA'+str(L-1)],grads['dW'+str(L)],grads['db'+str(L)]=linear_activation_backward(dAL,current_cache,'sigmoid')
    for i in reversed(range(L-1)):
        grads['dA'+str(i)],grads['dW'+str(i+1)],grads['db'+str(i+1)]=linear_activation_backward(grads['dA'+str(i+1)],caches[i],'relu')
    return grads

In [None]:
def initial_adam_optimisation(parameters):
    L=len(parameters)//2
    optimise={}
    for i in range(1,L+1):
        optimise['Vdw'+str(i)]=np.zeros_like(parameters['W'+str(i)])
        optimise['Vdb'+str(i)]=np.zeros_like(parameters['b'+str(i)])
        optimise['Sdw'+str(i)]=np.zeros_like(parameters['W'+str(i)])
        optimise['Sdb'+str(i)]=np.zeros_like(parameters['b'+str(i)])
    return optimise

In [None]:
# function to update parameters
def update_parameters(parameters,grads,learning_rate,optimise,beta1,beta2,epsilon):
#     print(learning_rate)
#     print(beta1,beta2)
    L=len(parameters)//2
#     print(parameters["W" + str(L)].shape,grads["dW" + str(L)].shape)
    for i in range(1,L+1):
        optimise['Vdw'+str(i)]=beta1*optimise['Vdw'+str(i)]+(1-beta1)*grads['dW'+str(i)]
        optimise['Vdb'+str(i)]=beta1*optimise['Vdb'+str(i)]+(1-beta1)*grads['db'+str(i)]
        optimise['Sdw'+str(i)]=beta2*optimise['Sdw'+str(i)]+(1-beta2)*np.power(grads['dW'+str(i)],2)
        optimise['Sdb'+str(i)]=beta2*optimise['Sdb'+str(i)]+(1-beta2)*np.power(grads['db'+str(i)],2)
#         print(np.sqrt(optimise['Sdw'+str(i)]+epsilon))
#         if(np.sqrt(optimise['Sdw'+str(i)]+epsilon).any==0):
#             print('hlo')
        print('Sdw'+str(i))
        print(np.sqrt(optimise['Sdw'+str(i)]+epsilon))
        print('dw'+str(i))
        print(grads['dW'+str(i)])
#         print('Hlo')
#         parameters['W'+str(i)]-=learning_rate*optimise['Vdw'+str(i)]/np.sqrt(optimise['Sdw'+str(i)]+epsilon)
#         parameters['b'+str(i)]-=learning_rate*optimise['Vdb'+str(i)]#/np.sqrt(optimise['Sdb'+str(i)]+epsilon)
        parameters['W'+str(i)]-=learning_rate*grads['dW'+str(i)]
        parameters['b'+str(i)]-=learning_rate*grads['db'+str(i)]
    return parameters,optimise

In [None]:
# code for mini batches
def random_mini_batch(X,Y,batch_size=512):
    batches=[]
    m=X.shape[1]
    num_batch=m//batch_size
#     shuffling x and y
    permutation=list(np.random.permutation(m))
    shuffeled_X=X[:,permutation]
    shuffeled_Y=Y[:,permutation].reshape((1,m))
    for i in range(0,num_batch):
        mini_X=X[:,i*batch_size:(i+1)*batch_size]
        mini_Y=Y[:,i*batch_size:(i+1)*batch_size]
        batches.append((mini_X,mini_Y))
    i=num_batch
    rest_X=X[:,i*batch_size:(i+1)*batch_size]
    rest_Y=Y[:,i*batch_size:(i+1)*batch_size]
    batches.append((rest_X,rest_Y))
    return batches

In [None]:
# Now that all the helper function are made,lets make our model
def model(X,Y,layer_dims,learning_rate=0.0075,beta1=0.9,beta2=0.999,epsilon=1e-8,num_iterations=3000,print_cost=False):
    np.random.seed(1)
    print(layer_dims)
    costs=[]
    m=X.shape[1]
    parameters=initialize_parameters(layer_dims)
    optimise=initial_adam_optimisation(parameters)
    mini_batches=random_mini_batch(X,Y,batch_size=512)
    for i in range(0,num_iterations):
        cost=0.
        for mini_X,mini_Y in mini_batches:
            AL,caches=forward(mini_X,parameters)
            cost+=compute_cost(AL,mini_Y)
            grads=backward(AL,mini_Y,caches)
            parameters,optimise=update_parameters(parameters,grads,learning_rate,optimise,beta1,beta2,epsilon)
        avg_cost=cost/len(mini_batches)
        costs.append(avg_cost)
        if i%10==0 and print_cost==True:
            print ("Cost after iteration %i: %f" %(i, avg_cost))
    plt.plot(range(num_iterations),costs)
    plt.title("Learning rate =" + str(learning_rate))
    print(avg_cost)
    plt.show()
    return parameters

In [None]:
X.shape

In [None]:
layer_dims=[X.shape[0],2,1]
parameters=model(X,y,layer_dims,learning_rate=0.0075,num_iterations=1,print_cost=True)

In [None]:
# lr=0.0055
# while(lr<=0.01):
#     parameters=model(X,y,layer_dims,learning_rate=lr,num_iterations=2500,print_cost=False)
#     lr+=0.0005

In [None]:

def predict(X, y, parameters):
    m = X.shape[1]
    n = len(parameters) // 2 # number of layers in the neural network
    p = np.zeros((1,m))
    
    # Forward propagation
    probas, caches =forward(X, parameters)

    
    # convert probabilities to 0/1 predictions
    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    
    #print results
#     print ("predictions: " + str(p))
#     print ("true labels: " + str(y))
    print("Accuracy: "  + str(np.sum((p == y)/m)))
        
    return p


In [None]:
p=predict(X,y,parameters)

In [None]:

X_test,y_test=refine_data(X_test,y_test)
print(X_test.shape)
print(y_test.shape)
p_test=predict(X_test,y_test,parameters)

In [None]:
parameters