# Vectorization

### Checking the significance of Vectorization
#### Way faster than for loop 

In [None]:
import numpy as np
import time

# Generating 2 vectors of size 10 million and calculating their dot product using numpy
# and a for loop to compare performance.
a=np.random.rand(10000000)
b=np.random.rand(10000000)

#Noting Start time and End time to get the time needed to calculate the dot product
start=time.time()
# Dot product using numpy
c=np.dot(a,b)
end=time.time()
print("Time taken for dot product:", (end-start)*1000, "ms")
print("Dot product result:", c)

# Noting Start time and End time to get the time needed to calculate the dot product using a for loop
start=time.time()
d=0
# For loop dot product
for i in range(10000000):
    d+=a[i]*b[i]
end=time.time()
print("Time taken for loop dot product:", (end-start)*1000, "ms")
print("Loop dot product result:", d)


Time taken for dot product: 1.0700225830078125 ms
Dot product result: 2499752.6412505116
Time taken for loop dot product: 2561.891794204712 ms
Loop dot product result: 2499752.6412503873


In [25]:
# Other Element wise operations usinfg numpy
a=np.random.rand(100000000)
start=time.time()
ae=np.exp(a)
end1=time.time()
print("Time taken for element wise EXPO operations:", (end1-start)*1000, "ms")
asine=np.arctan(a)
end2=time.time()
print("Time taken for element wise ASINE operations:", (end2-end1)*1000, "ms")
end3=time.time()
degrees=np.degrees(asine)
end=time.time()
print("Time taken for element wise degree operations:", (end-end3)*1000, "ms")
print("total time taken for element wise operations:", (end-start)*1000, "ms")
# print("a sine:", degrees)
# print("ae:", ae)
# print("a:", a)

Time taken for element wise EXPO operations: 390.0773525238037 ms
Time taken for element wise ASINE operations: 730.9997081756592 ms
Time taken for element wise degree operations: 202.55017280578613 ms
total time taken for element wise operations: 1323.627233505249 ms


## Lets Code for Logistic Regression

In [84]:
# Lets Genrate some data and pass thorugh a function and get a Output data
# This data is out ground truth data and we want to tune our weights of LR model 
# TO predict the out of function.
X=np.random.rand(1, 100000)
# Here Input vector has only one element and There are 100000 such training examples.
# nx=1 and m=100000
print("X shape:", X.shape)
# We are going to use a function Y=2X+3>4 to generate the output data.
# As this is LOGISTIC REGRESSION , Our output should be the binary value of 0 or 1.
Y = ((2 * X + 3 )>4).astype(int)
print("Y shape:", Y.shape)
print("X:", X[0][2],"Y:", Y[0][2])

X shape: (1, 100000)
Y shape: (1, 100000)
X: 0.37512684766943194 Y: 0


In [86]:
W=0
B=0
for i in range(1000):
    Z=np.dot(W, X) + B
    A=1/(1+np.exp(-Z))
    dZ= A-Y
    dW= np.dot(X, dZ.T)/X.shape[1]
    dB= np.sum(dZ)/X.shape[1]
    W=W-0.1*dW
    B=B-0.1*dB
    if i%100==0:
        print("W:", W, "B:", B)
        print("Cost:", np.sum(-Y*np.log(A)-(1-Y)*np.log(1-A))/X.shape[1])
        print("dW:", dW, "dB:", dB)
        print("X:", X[0][2]," Predicted :", A[0][2], "Truth:", Y[0][2], )
        print("-------------------------------")

W: [[0.0124031]] B: -0.00018500000000000002
Cost: 0.6931471805599453
dW: [[-0.12403103]] dB: 0.00185
X: 0.37512684766943194  Predicted : 0.5 Truth: 0
-------------------------------
W: [[0.98931572]] B: -0.3299833214077108
Cost: 0.5840963428941714
dW: [[-0.08408894]] dB: 0.041755578738047805
X: 0.37512684766943194  Predicted : 0.5105376194988209 Truth: 0
-------------------------------
W: [[1.76162888]] B: -0.727672733948925
Cost: 0.5083083020267528
dW: [[-0.07139579]] dB: 0.0372003657339594
X: 0.37512684766943194  Predicted : 0.4835567671291925 Truth: 0
-------------------------------
W: [[2.42645539]] B: -1.0735638361800146
Cost: 0.45193166867123574
dW: [[-0.06203832]] dB: 0.032199623827717405
X: 0.37512684766943194  Predicted : 0.4594784323915578 Truth: 0
-------------------------------
W: [[3.00798894]] B: -1.3746742125886289
Cost: 0.4089050572929383
dW: [[-0.05462182]] dB: 0.02821893694002196
X: 0.37512684766943194  Predicted : 0.4389255192978544 Truth: 0
-------------------------

In the Above Output you can notice That
1. Cost function is reducing and If I increase the number of Iteration , It might decrese more
2. Convergence also depends on the learning rate
3. YOu can notice you predicted value comes closer to Truth value
4. You can verify the Prediction by selecting any value of x between 0 and 1 and calculate  Z
5. Post that pass z value to sigmoid function and get A value which is predicted value

### An Interesting Case Of logistic Regession where my model Needs to predict Odd/ Even Numbers

In [122]:
X=np.random.randint(-100, 100, (1, 100000))
Y =  (X%2 == 0).astype(int)
print("X shape:", X.shape)
print("Y shape:", Y.shape)


X shape: (1, 100000)
Y shape: (1, 100000)


Here If I give only one feature that is random number from -100 to 100 and try to learn weights based on Output Y. It will converge to some other minima and I would not get accurate prediction. This is because Because "evenness" is not linearly separable.
It can only solve problems where you can separate class 0 and class 1 with a straight line (in 1D or higher dimensions).
The label alternates every step, like a wave.

There is no linear boundary that separates this alternating pattern.

So Logistic Regression can't learn this rule.

Hence I am introducing another feature here which says if the number is divisible by 2 or not.
This would be exactly same as the Output vector Y

In [123]:
X1=np.array([X,Y])
print("X shape:", X1.shape)
X1=np.reshape(X1, (2, 100000))
print("X shape:", X1.shape)

X shape: (2, 1, 100000)
X shape: (2, 100000)


In [124]:
W=np.zeros((2, 1))
print("W shape:", W.shape)

W shape: (2, 1)


In [None]:
W=np.zeros((2, 1)) # Column vector
B=0
for i in range(10000):
    Z=np.dot(W.T, X1) + B # Row Vector [1,10000]
    A=1/(1+np.exp(-Z))# Row Vector [1,10000]
    dZ= A-Y # Row Vector [1,10000]
    dW= np.dot(X1, dZ.T)/X1.shape[1] # Column Vector [2,1]
    dB= np.sum(dZ)/X1.shape[1] # Scalar
    W=W-0.01*dW # Column Vector [2,1]
    B=B-0.01*dB # Scalar
    if i%1000==0:
        print("W:", W, "B:", B)
        print("Cost:", np.sum(-Y*np.log(A)-(1-Y)*np.log(1-A))/X1.shape[1])
        print("dW:", dW, "dB:", dB)
        print("X1:", X1[0][2], "X2:", X1[1][2]," Predicted :", A[0][2], "Truth:", Y[0][2], )
        print("-------------------------------")

W: [[-0.0041617]
 [ 0.0024996]] B: -8.000000000000001e-07
Cost: 0.6931471805599453
dW: [[ 0.41617]
 [-0.24996]] dB: 8e-05
X1: 35 X2: 0  Predicted : 0.5 Truth: 0
-------------------------------
W: [[-0.14449438]
 [ 2.26111936]] B: -0.22458265297990895
Cost: 1.9913088556299128
dW: [[23.96741817]
 [-0.20010967]] dB: 0.04506194269634702
X1: 35 X2: 0  Predicted : 0.9571871590156502 Truth: 0
-------------------------------
W: [[-0.12950854]
 [ 4.15832051]] B: -0.7285272924962131
Cost: 1.790307979666968
dW: [[23.21801324]
 [-0.17012361]] dB: 0.06326704896878918
X1: 35 X2: 0  Predicted : 0.9461200537632105 Truth: 0
-------------------------------
W: [[-0.11874201]
 [ 5.78259714]] B: -1.3739248563274924
Cost: 1.4955979478800838
dW: [[22.16743462]
 [-0.14657754]] dB: 0.07126099998071171
X1: 35 X2: 0  Predicted : 0.9028604732387896 Truth: 0
-------------------------------
W: [[-0.10876032]
 [ 7.17230314]] B: -2.0827543094097547
Cost: 1.1698263289908724
dW: [[20.77199133]
 [-0.1251791 ]] dB: 0.073

In [None]:
# From the Above output we can see that the model is learning and the cost is decreasing.
# Now we can use the learned weights and bias to predict the output for new data.
# Testimg the model on new data
X_test=np.array([[1,2,44,55,-12,-24,35,99,-98], [0,1,1,0,1,1,0,0,1]])
print("X_test shape:", X_test.shape)
Z_test=np.dot(W.T, X_test) + B # Row Vector [1,10000]
A_test=1/(1+np.exp(-Z_test))# Row Vector [1,10000]
for i in range(X_test.shape[1]):
    print("X1:", X_test[0][i], "X2:", X_test[1][i]," Predicted :", A_test[0][i], "Truth:", X_test[1][i], )
print()

X_test shape: (2, 9)
X1: 1 X2: 0  Predicted : 0.023337328979371595 Truth: 0
X1: 2 X2: 1  Predicted : 0.9942554016800846 Truth: 1
X1: 44 X2: 1  Predicted : 0.9942126893133956 Truth: 1
X1: 55 X2: 0  Predicted : 0.0231199817420958 Truth: 0
X1: -12 X2: 1  Predicted : 0.9942695693732415 Truth: 1
X1: -24 X2: 1  Predicted : 0.9942816854565367 Truth: 1
X1: 35 X2: 0  Predicted : 0.02320024931934821 Truth: 0
X1: 99 X2: 0  Predicted : 0.02294434631957597 Truth: 0
X1: -98 X2: 1  Predicted : 0.9943558404858729 Truth: 1



Yes Now Its Possible to get the the Odd/Even Classification using Logistic regression
In the Above output you can see All Odd numbers are predicted with 0.99 and all even number are predicted with 0.02 probablility