#Chapter 19

Gradient Descent

In [1]:
import numpy as np
import torch
import torch.nn as nn
import sys

m = nn.Sigmoid()
loss_fun = nn.BCELoss()
lr = 0.0001
x = torch.randn(1)
y = torch.randint(0,2,(1,),dtype=torch.float)
w = torch.randn(1,requires_grad=True)

nIter = 100
for i in range(nIter):
    y_hat = m(w*x)
    loss = loss_fun(y_hat,y)
    loss.backward()
    dw = w.grad.data
    with torch.no_grad():
        w -= lr*dw
    w.grad.data.zero_()
    print(loss.item())

0.7112152576446533
0.7112152576446533
0.7112151384353638
0.7112150192260742
0.7112149000167847
0.7112147808074951
0.7112147808074951
0.7112146615982056
0.711214542388916
0.7112144231796265
0.7112143039703369
0.7112141847610474
0.7112141847610474
0.7112140655517578
0.7112139463424683
0.7112138271331787
0.7112138271331787
0.7112137079238892
0.7112135887145996
0.7112134695053101
0.7112132906913757
0.7112132906913757
0.7112131714820862
0.7112130522727966
0.7112129330635071
0.7112128138542175
0.7112128138542175
0.7112125754356384
0.7112124562263489
0.7112124562263489
0.7112123370170593
0.7112122178077698
0.7112120985984802
0.7112119793891907
0.7112119793891907
0.7112118601799011
0.7112117409706116
0.711211621761322
0.7112115025520325
0.7112115025520325
0.7112113833427429
0.7112112641334534
0.7112111449241638
0.7112111449241638
0.7112109065055847
0.7112109065055847
0.7112107872962952
0.7112106680870056
0.7112105488777161
0.7112105488777161
0.7112104296684265
0.711210310459137
0.7112101912498

In [4]:
import numpy as np
import torch
import torch.nn as nn
import sys


def wSum(X,W):
    h = torch.from_numpy(X)
    z = torch.matmul(W,h)
    return z

def forwardStep(X,W_list):
    h = torch.from_numpy(X)
    for W in W_list:
        z = torch.matmul(W,h)
        h = activate(z)
    return h

def activate(x):
    return 1/(1+torch.exp(-x))

def updateParams(W_list,dW_list,lr):
    with torch.no_grad():
        for i in range(len(W_list)):
            W_list[i] -= lr*dW_list[i]
    return W_list

def trainNN_sgd(X,y,W_list,loss_fn,lr=0.0001,nepochs=100):
    for epoch in range(nepochs):
        avgLoss = []
        for i in range(len(y)):
            Xin = X[i,:]
            yTrue = y[i]
            y_hat = forwardStep(Xin,W_list)
            loss = loss_fn(y_hat,torch.tensor(yTrue,dtype=torch.double))
            loss.backward()
            avgLoss.append(loss.item())
            sys.stdout.flush()
            dW_list = []
            for j in range(len(W_list)):
                dW_list.append(W_list[j].grad.data)
            W_list = updateParams(W_list,dW_list,lr)
            for j in range(len(W_list)):
                W_list[j].grad.data.zero_()
        print("Loss after epoch=%d: %f" %(epoch,np.mean(np.array(avgLoss))))
    return W_list

def trainNN_batch(X,y,W_list,loss_fn,lr=0.0001,nepochs=100):
    n = len(y)
    for epoch in range(nepochs):
        loss = 0
        for i in range(n):
            Xin = X[i,:]
            yTrue = y[i]
            y_hat = forwardStep(Xin,W_list)
            loss += loss_fn(y_hat,torch.tensor(yTrue,dtype=torch.double))
        loss = loss/n
        loss.backward()
        sys.stdout.flush()
        dW_list = []
        for j in range(len(W_list)):
            dW_list.append(W_list[j].grad.data)
        W_list = updateParams(W_list,dW_list,lr)
        for j in range(len(W_list)):
            W_list[j].grad.data.zero_()
        print("Loss after epoch=%d: %f" %(epoch,loss))
    return W_list

def trainNN_minibatch(X,y,W_list,loss_fn,lr=0.0001,nepochs=100,batchSize=16):
    n = len(y)
    numBatches = n//batchSize
    
    for epoch in range(nepochs):
        for batch in range(numBatches):
            X_batch = X[batch*batchSize:(batch+1)*batchSize,:]
            y_batch = y[batch*batchSize:(batch+1)*batchSize]
            loss = 0
            for i in range(batchSize):
                Xin = X_batch[i,:]
                yTrue = y_batch[i]
                y_hat = forwardStep(Xin,W_list)
                loss += loss_fn(y_hat,torch.tensor(yTrue,dtype=torch.double))
            loss = loss/batchSize
            loss.backward()
            sys.stdout.flush()
            dW_list = []
            for j in range(len(W_list)):
                dW_list.append(W_list[j].grad.data)
            W_list = updateParams(W_list,dW_list,lr)
            for j in range(len(W_list)):
                W_list[j].grad.data.zero_()
        print("Loss after epoch=%d: %f" %(epoch,loss/numBatches))
    return W_list

inputDim = 10
n = 1000
X = np.random.rand(n,inputDim)
y = np.random.randint(0,2,n)

print(X.shape)
print(y.shape)
np.unique(y)

W = torch.tensor(np.random.uniform(0,1,inputDim),requires_grad=True)

z = wSum(X[0,:],W)
W1 = torch.tensor(np.random.uniform(0,1,(2,inputDim)),requires_grad=True) #(2,inputDim) = (no. neurons, no imputs)
W2 = torch.tensor(np.random.uniform(0,1,(3,2)),requires_grad=True)
W3 = torch.tensor(np.random.uniform(0,1,3),requires_grad=True)

W_list = []
W_list.append(W1)
W_list.append(W2)
W_list.append(W3)

loss_fn = nn.BCELoss()
#W_list = trainNN_sgd(X,y,W_list,loss_fn,lr=0.0001,nepochs=100)
#W_list = trainNN_batch(X,y,W_list,loss_fn,lr=0.0001,nepochs=100)
W_list = trainNN_minibatch(X,y,W_list,loss_fn,lr=0.0001,nepochs=100)

(1000, 10)
(1000,)
Loss after epoch=0: 0.014824
Loss after epoch=1: 0.014810
Loss after epoch=2: 0.014795
Loss after epoch=3: 0.014781
Loss after epoch=4: 0.014766
Loss after epoch=5: 0.014752
Loss after epoch=6: 0.014738
Loss after epoch=7: 0.014723
Loss after epoch=8: 0.014709
Loss after epoch=9: 0.014695
Loss after epoch=10: 0.014681
Loss after epoch=11: 0.014667
Loss after epoch=12: 0.014653
Loss after epoch=13: 0.014639
Loss after epoch=14: 0.014625
Loss after epoch=15: 0.014611
Loss after epoch=16: 0.014597
Loss after epoch=17: 0.014584
Loss after epoch=18: 0.014570
Loss after epoch=19: 0.014556
Loss after epoch=20: 0.014543
Loss after epoch=21: 0.014529
Loss after epoch=22: 0.014516
Loss after epoch=23: 0.014502
Loss after epoch=24: 0.014489
Loss after epoch=25: 0.014476
Loss after epoch=26: 0.014462
Loss after epoch=27: 0.014449
Loss after epoch=28: 0.014436
Loss after epoch=29: 0.014423
Loss after epoch=30: 0.014410
Loss after epoch=31: 0.014397
Loss after epoch=32: 0.014384
L

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset,DataLoader

inputDim = 10
n = 1000
X = np.random.rand(n,inputDim)
y = np.random.randint(0,2,n)

tensor_x = torch.Tensor(X)
tensor_y = torch.Tensor(y)
Xy = TensorDataset(tensor_x,tensor_y)
Xy_loader = DataLoader(Xy,batch_size=16,shuffle=True,drop_last=True)

model = nn.Sequential(
    nn.Linear(inputDim,200),
    nn.ReLU(),
    #nn.BatchNorm1d(num_features=200),
    nn.Dropout(0.5),
    nn.Linear(200,100),
    nn.Tanh(),
    #nn.BatchNorm1d(num_features=100),
    nn.Linear(100,1),
    nn.Sigmoid()
)

optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
loss_fn = nn.BCELoss()

nepochs = 100
for epoch in range(nepochs):
    for X,y in Xy_loader:
        batch_size = X.shape[0]
        y_hat = model(X.view(batch_size,-1))
        loss = loss_fn(y_hat,y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(float(loss))

with torch.no_grad():
    xt = torch.tensor(np.random.rand(1,inputDim))
    y2 = model(xt.float())
    print(y2.detach().numpy()[0][0])