In [34]:
import numpy as np
import matplotlib.pyplot as plt
import time
%matplotlib inline

#importing original dataset
data_orig = np.loadtxt('a2_data/data_banknote_authentication.txt', delimiter=',')

In [35]:
#Dataset : 

print("Dataset : \n\n"+ str(data_orig))
print("\nDimensions of dataset : "+str(data_orig.shape))

Dataset : 

[[  3.6216    8.6661   -2.8073   -0.44699   0.     ]
 [  4.5459    8.1674   -2.4586   -1.4621    0.     ]
 [  3.866    -2.6383    1.9242    0.10645   0.     ]
 ...
 [ -3.7503  -13.4586   17.5932   -2.7771    1.     ]
 [ -3.5637   -8.3827   12.393    -1.2823    1.     ]
 [ -2.5419   -0.65804   2.6842    1.1952    1.     ]]

Dimensions of dataset : (1372, 5)


In [36]:
#Seed for np.random
seed=0
np.random.seed(seed)    

In [37]:
#Shuffling imported original dataset
np.random.shuffle(data_orig)  

In [38]:
#Shuffled dataset
print("Shuffled dataset with (Seed "+str(seed) +") :\n\n"+str(data_orig))

Shuffled dataset with (Seed 0) :

[[ -1.7713   -10.7665    10.2184    -1.0043     1.      ]
 [  5.1321    -0.031048   0.32616    1.1151     0.      ]
 [ -2.0149     3.6874    -1.9385    -3.8918     1.      ]
 ...
 [  0.6005     0.99945   -2.2126     0.097399   1.      ]
 [  2.0165    -0.25246    5.1707     1.0763     0.      ]
 [ -2.0759    10.8223     2.6439    -4.837      0.      ]]


In [39]:
#Extacting Y
y_orig = data_orig[:,-1]
print("Output Y   :"+str(y_orig))
print("Shape of Y : "+str(y_orig.shape))
print("Number of 0s : "+str(y_orig.shape[0]-np.sum(y_orig)))
print("Number of 1s : "+str(np.sum(y_orig)))

Output Y   :[1. 0. 1. ... 1. 0. 0.]
Shape of Y : (1372,)
Number of 0s : 762.0
Number of 1s : 610.0


In [40]:
#Getting rid of Rank 1 array
Y = np.reshape(y_orig,(y_orig.shape[0],1)).T    
print("Shape of Y: "+ str(Y.shape))

Shape of Y: (1, 1372)


In [41]:
#Extracting vectorized input feature X (transposed)
x_shuffled = data_orig[:,(0,1,2,3)].T
print("Input set : \n\n" +str(x_shuffled))

Input set : 

[[ -1.7713     5.1321    -2.0149   ...   0.6005     2.0165    -2.0759  ]
 [-10.7665    -0.031048   3.6874   ...   0.99945   -0.25246   10.8223  ]
 [ 10.2184     0.32616   -1.9385   ...  -2.2126     5.1707     2.6439  ]
 [ -1.0043     1.1151    -3.8918   ...   0.097399   1.0763    -4.837   ]]


In [61]:
print(x_shuffled.shape)

(4, 1372)


In [78]:
def standardize(x):
    """
    Input  :  Numpy array x 
    Output :  Numpy array of same shape as X but standardized along each rows
    
    """
    x_mean = np.mean(x,axis=1, keepdims=True)
    x_std = np.std(x, axis=1, keepdims=True)

    #print("Mean of each row : \n\n"+str(x_mean))
    #print("\nStandard deviation of each row : \n\n"+str(x_std))

    X = (x - x_mean)  #Python Broadcasting
    X = X/x_std
    return X

In [79]:
#Standardizing shuffled input X
X = standardize(x_shuffled)
print("Standardizd Input X : \n\n"+str(X))
print("\nMeans of features              : \n"+str(np.mean(X,axis=1,keepdims=True)))
print("\nStandard Deviation of features : \n"+str(np.std(X,axis=1,keepdims=True)))

Standardizd Input X : 

[[-0.77594917  1.65334873 -0.86167171 ...  0.05868431  0.55697295
  -0.88313754]
 [-2.16278391 -0.33295243  0.30084791 ... -0.15730657 -0.37069157
   1.51697413]
 [ 2.04731499 -0.24868917 -0.77432025 ... -0.83793928  0.87573603
   0.28926186]
 [ 0.08920688  1.09832613 -1.2856312  ...  0.61376368  1.07985212
  -1.73567343]]

Means of features              : 
[[-1.42419280e-17]
 [-7.05622797e-17]
 [ 1.58603289e-17]
 [ 4.72573066e-17]]

Standard Deviation of features : 
[[1.]
 [1.]
 [1.]
 [1.]]


In [80]:
#Splitting into Train, Test sets ( with a fixed seed )
train_split_percent = 80
test_split_percent = 20

train_X , test_X = X[:, : int( (train_split_percent/100)*X.shape[1])] , X[:,int( (train_split_percent/100)*X.shape[1]) : ]
train_Y , test_Y = Y[:, : int( (train_split_percent/100)*X.shape[1])] , Y[:,int( (train_split_percent/100)*X.shape[1]) : ]
print("Seed of Randomization   : "+str(seed))
print("\nShape of Training set X : "+str(train_X.shape))
print("Shape of Training set Y : "+str(train_Y.shape))
print("\nShape of Test set   X   : "+str(test_X.shape))
print("Shape of Test set Y     : "+str(test_Y.shape))

Seed of Randomization   : 0

Shape of Training set X : (4, 1097)
Shape of Training set Y : (1, 1097)

Shape of Test set   X   : (4, 275)
Shape of Test set Y     : (1, 275)


In [81]:
m_train = train_X.shape[1]   #no. of training examples
m_test  = test_X.shape[1]    #no. of test examples
print("No of training examples : "+str(m_train))
print("No of test example      : "+str(m_test))

No of training examples : 1097
No of test example      : 275


In [82]:
#train_X = standardize(train_X)
print("Standardize train_X : "+str(train_X.shape)+"\n\n"+str(train_X))
print("\nMeans of features              : \n"+str(np.mean(train_X,axis=1,keepdims=True)))
print("\nStandard Deviation of features : \n"+str(np.std(train_X,axis=1,keepdims=True)))
#test_X  = standardize(test_X)
print("\n\nStandardize test_X : "+str(test_X.shape)+"\n\n"+str(test_X))

Standardize train_X : (4, 1097)

[[-0.77594917  1.65334873 -0.86167171 ... -0.03451608  0.46305117
   1.62276873]
 [-2.16278391 -0.33295243  0.30084791 ...  0.83767247 -0.3576421
   0.33647145]
 [ 2.04731499 -0.24868917 -0.77432025 ... -0.16257486  0.87852125
  -0.65638946]
 [ 0.08920688  1.09832613 -1.2856312  ...  0.30222465  1.18264954
   0.9782498 ]]

Means of features              : 
[[ 0.00330544]
 [-0.00980325]
 [ 0.01036772]
 [ 0.01179597]]

Standard Deviation of features : 
[[1.00354008]
 [0.99904738]
 [0.99977808]
 [0.98720635]]


Standardize test_X : (4, 275)

[[ 1.26981427 -2.21933232 -0.60468005 ...  0.05868431  0.55697295
  -0.88313754]
 [-1.07609556  1.23622997  0.22995878 ... -0.15730657 -0.37069157
   1.51697413]
 [ 0.77730177 -0.38809864 -0.73453804 ... -0.83793928  0.87573603
   0.28926186]
 [ 1.10875348 -2.30951061 -0.98519039 ...  0.61376368  1.07985212
  -1.73567343]]


In [83]:
# Starting with our problem now

In [84]:
#First programming all helper function then will integrate the function

In [85]:
def sigmoid(z):
    """
    Input  : Scalar or Numpy array z
    Output : sigmoid of z
    """
    return 1/(1+np.exp(-1*z))
    

In [86]:
#Testing Sigmoid
print(sigmoid(0))
print(sigmoid(1000000000000000))
print(sigmoid(0.5))
print(sigmoid(np.array([0.5,1, 10])))

0.5
1.0
0.6224593312018546
[0.62245933 0.73105858 0.9999546 ]


In [87]:
#Initializing parameters
w, b = np.zeros((train_X.shape[1])), 0 
print("w : "+str(w))
print("b : "+str(b))

w : [0. 0. 0. ... 0. 0. 0.]
b : 0


In [88]:
def calculate(w,b,X,Y):
    """
    Input : w - wieghts, Numpy array, Real 
            b - bias, Scalar
            X - Input matrix, dim = (4,m_X) , Real Matrix
            Y - Output Vector, dim = (1,m_X), Discrete 0/1
    
    Output: cost - cross entropy loss
            dw   - gradient of the loss with respect to w, thus same shape as w
            db   - gradient of the loss with respect to b, thus same shape as b
    """
    m = X.shape[1]
   #Vectorized Implementation
    A = sigmoid(np.dot(w.T,X)+b)
    
    #Average of loss over m examples 
    cost = (-1/m)*np.sum(np.multiply(Y,np.log(A))+np.multiply(1-Y,np.log(1-A)))  
    
    #Gradient
    dw = (1/m)*np.dot(X,(A-Y).T)    #Vectorized Implementation
    db = (1/m)*np.sum(A-Y)
    
    grads = { 'dw':dw, 'db':db}
    
    return cost, grads

In [89]:
def gradient_descent(w, b, X, Y, num_iterations, alpha, print_cost = False, print_after=100):
    
    costs = []
    for i in range(num_iterations):  
            
        cost, grads = calculate(w, b, X, Y)
        dw = grads['dw']
        db = grads['db']
        w = w-alpha*dw
        b = b-alpha*db
    
        if i%100 ==0:
            costs.append(cost)
        if print_cost and i % print_after == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
            
    parameters = {"w": w, "b": b}
    gradients = {"dw": dw, "db": db}
    
    return parameters, gradients, costs      
  

In [90]:
def predict(w, b, X):
    m= X.shape[1]
    Y_prediction = np.zeros((1,m))
    
    A=sigmoid(np.dot(w.T,X)+b)
    assert(A.shape==(1,m))
    
    for i in range(A.shape[1]):
          Y_prediction[0, i] = 1 if A[0, i] > 0.5 else 0
            
    assert(Y_prediction.shape == (1, m))
    
    return Y_prediction

In [91]:
def accuracy(Y,Yhat):
    print("Shape : "+str(Y.shape[1]))
    return (np.sum(np.equal(Y,Yhat))/Y.shape[1])*100

In [92]:
print((np.sum(np.equal([1,1,0], [0,1,0]))))

2


In [93]:
def model(train_X, train_Y, test_X, test_Y, num_iterations = 2000, learning_rate = 0.5, print_cost = False,print_after=100):

    w, b = np.zeros((4,1)),0
    tic = time.time()
    parameters, grads, costs = gradient_descent(w, b, train_X, train_Y, num_iterations, learning_rate, print_cost,print_after)
    

    w = parameters["w"]
    b = parameters["b"]
    

    test_Yhat = predict(w, b, test_X)
    train_Yhat = predict(w, b, train_X)
    
    toc= time.time()
    print("train accuracy: {} %".format(100 - np.mean(np.abs(train_Yhat - train_Y)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(test_Yhat - test_Y)) * 100))

    
    d = {"costs": costs,
         "Y_prediction_test": test_Yhat, 
         "Y_prediction_train" : train_Yhat, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    time_t = (toc-tic)*1000
    print("Time taken by the model : "+str((toc-tic))+" sec")
    return d

In [94]:
d = model(train_X, train_Y, test_X, test_Y, num_iterations = 30001, learning_rate = 0.85, print_cost = True,print_after=2000)

Cost after iteration 0: 0.693147
Cost after iteration 2000: 0.032939
Cost after iteration 4000: 0.028339
Cost after iteration 6000: 0.026273
Cost after iteration 8000: 0.025048
Cost after iteration 10000: 0.024223
Cost after iteration 12000: 0.023624
Cost after iteration 14000: 0.023166
Cost after iteration 16000: 0.022803
Cost after iteration 18000: 0.022509
Cost after iteration 20000: 0.022265
Cost after iteration 22000: 0.022059
Cost after iteration 24000: 0.021883
Cost after iteration 26000: 0.021731
Cost after iteration 28000: 0.021598
Cost after iteration 30000: 0.021482
train accuracy: 99.08842297174111 %
test accuracy: 99.27272727272727 %
Time taken by the model : 2.464763641357422 sec


# 