<a href="https://colab.research.google.com/github/profteachkids/CHE2064/blob/master/computational_methods/NeuralNetworkPrimer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torchvision as tv
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default='colab'
pio.templates.default='plotly_dark'
import torchsummary

## Fitting Sin with Linear Neural Network

In [None]:
x = torch.linspace(0,10, 25).unsqueeze(1)
y= torch.sin(x)
x_norm = (x-x.mean())/x.std()

In [None]:
lr=1e-1

w1=torch.distributions.Normal(0, 1/20**0.5).sample((1,20)).requires_grad_()
b1=torch.zeros((1,20),requires_grad=True)

for i in range(20000):
  o1=torch.tanh(x_norm@w1+b1)
  predict=torch.sum(o1,dim=1)
  loss = torch.mean((predict-y.squeeze())**2)
  if i % 1000 ==0:
    print(i, loss)
  loss.backward()
  with torch.no_grad():
    w1.sub_(lr * w1.grad)
    b1.sub_(lr * b1.grad)
    # w2.sub_(lr * w2.grad)
    # b2.sub_(lr * b2.grad)
    w1.grad=None
    b1.grad=None
    # w2.grad=None
    # b2.grad=None




0 tensor(1.3762, grad_fn=<MeanBackward0>)
1000 tensor(0.0178, grad_fn=<MeanBackward0>)
2000 tensor(0.0077, grad_fn=<MeanBackward0>)
3000 tensor(0.0036, grad_fn=<MeanBackward0>)
4000 tensor(0.0018, grad_fn=<MeanBackward0>)
5000 tensor(0.0011, grad_fn=<MeanBackward0>)
6000 tensor(0.0008, grad_fn=<MeanBackward0>)
7000 tensor(0.0007, grad_fn=<MeanBackward0>)
8000 tensor(0.0006, grad_fn=<MeanBackward0>)
9000 tensor(0.0005, grad_fn=<MeanBackward0>)
10000 tensor(0.0005, grad_fn=<MeanBackward0>)
11000 tensor(0.0004, grad_fn=<MeanBackward0>)
12000 tensor(0.0004, grad_fn=<MeanBackward0>)
13000 tensor(0.0004, grad_fn=<MeanBackward0>)
14000 tensor(0.0003, grad_fn=<MeanBackward0>)
15000 tensor(0.0003, grad_fn=<MeanBackward0>)
16000 tensor(0.0003, grad_fn=<MeanBackward0>)
17000 tensor(0.0002, grad_fn=<MeanBackward0>)
18000 tensor(0.0002, grad_fn=<MeanBackward0>)
19000 tensor(0.0002, grad_fn=<MeanBackward0>)


In [None]:
fig= make_subplots()
fig.add_scatter(x=x.squeeze(),y=y.squeeze(), mode='markers')
fig.add_scatter(x=x.squeeze(), y=predict.detach().numpy(), mode='lines')
fig.update_layout(width=800,height=500)
fig.show()

In [None]:
model = nn.Sequential(nn.Linear(1,20),
                      nn.Tanh(),
                      # nn.Linear(20,20),
                      # nn.Tanh(),
                      nn.Linear(20,1))

loss_fn = nn.MSELoss()
optim = torch.optim.Adam(model.parameters())
for i in range(10000):
  loss = loss_fn(model(x_norm), y)
  if i % 1000 == 0:
    print(i, loss)
  optim.zero_grad()
  loss.backward()
  optim.step()


0 tensor(0.4937, grad_fn=<MseLossBackward>)
1000 tensor(0.0517, grad_fn=<MseLossBackward>)
2000 tensor(0.0017, grad_fn=<MseLossBackward>)
3000 tensor(0.0002, grad_fn=<MseLossBackward>)
4000 tensor(2.9054e-05, grad_fn=<MseLossBackward>)
5000 tensor(1.2619e-05, grad_fn=<MseLossBackward>)
6000 tensor(7.9591e-06, grad_fn=<MseLossBackward>)
7000 tensor(5.2002e-06, grad_fn=<MseLossBackward>)
8000 tensor(3.0174e-06, grad_fn=<MseLossBackward>)
9000 tensor(2.1833e-06, grad_fn=<MseLossBackward>)


In [None]:
fig= make_subplots()
fig.add_scatter(x=x.squeeze(),y=y.squeeze(),mode='markers')
fig.add_scatter(x=x.squeeze(), y=model(x_norm).squeeze().detach().numpy(), mode='lines')
fig.update_layout(width=800,height=500)
fig.show()

## MNIST

In [2]:
transforms=tv.transforms.Compose([tv.transforms.ToTensor()])
mnist_train=tv.datasets.MNIST('./', train=True, download=True, transform=transforms)
mnist_test = tv.datasets.MNIST('./', train=False, download=True, transform=transforms)

### Dataset

In [None]:
sampler = torch.utils.data.DataLoader(mnist_train, batch_size=64, shuffle=True,drop_last=True)
data=iter(sampler)
a=next(data)
pic,label = a




In [None]:

print(pic.shape)
print(label.shape)
img=tv.utils.make_grid(pic, pad_value=255)
print(img.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])
torch.Size([3, 242, 242])


In [None]:
fig = make_subplots()
fig.add_image(z=img.permute([1,2,0])*255, colormodel='rgb')
fig.update_layout(width=500, height=500, margin=dict(l=2,r=2,b=2,t=2), xaxis_visible=False, yaxis_visible=False)





In [None]:
label.reshape(8,8)

tensor([[4, 0, 0, 5, 6, 6, 1, 5],
        [9, 9, 5, 9, 5, 8, 0, 0],
        [8, 3, 6, 8, 3, 9, 1, 5],
        [6, 1, 3, 1, 9, 5, 4, 3],
        [7, 9, 5, 0, 0, 2, 5, 2],
        [3, 9, 7, 1, 4, 7, 7, 1],
        [0, 4, 6, 3, 8, 0, 9, 5],
        [2, 2, 3, 9, 1, 6, 4, 3]])

### Linear Neural Network

In [None]:
model = nn.Sequential(nn.Linear(784,100),
                      nn.Tanh(),
                      nn.Linear(100,100),
                      nn.Tanh(),
                      nn.Linear(100,10),
                      nn.Softmax(dim=0))


In [None]:
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters())
train_sampler = torch.utils.data.DataLoader(mnist_train, batch_size=5000, shuffle=True,drop_last=True)
test_sampler = torch.utils.data.DataLoader(mnist_test, batch_size=5000, shuffle=True,drop_last=True)


for epoch in range(10):
  model.train()
  train_data=iter(train_sampler)
  for i, (train_pic, train_label) in enumerate(train_data):
    optim.zero_grad()
    predict=model(train_pic.flatten(1))
    loss = loss_fn(predict,train_label)
    if i % 5 == 0 :
      print(f'epoch: {epoch} minibatch: {i}  loss: {loss}')
    loss.backward()
    optim.step()
  model.eval()
  test_pic, test_label=next(iter(test_sampler))
  predict_label=torch.argmax(model(test_pic.flatten(1)),dim=1)
  print(torch.mean((predict_label==test_label).float()))
  print()



epoch: 0 minibatch: 0  loss: 2.3026669025421143
epoch: 0 minibatch: 5  loss: 2.302597761154175
epoch: 0 minibatch: 10  loss: 2.3024251461029053
tensor(0.7362)

epoch: 1 minibatch: 0  loss: 2.3023340702056885
epoch: 1 minibatch: 5  loss: 2.302100658416748
epoch: 1 minibatch: 10  loss: 2.301879405975342
tensor(0.7826)

epoch: 2 minibatch: 0  loss: 2.3017876148223877
epoch: 2 minibatch: 5  loss: 2.301591157913208
epoch: 2 minibatch: 10  loss: 2.3014252185821533
tensor(0.8158)

epoch: 3 minibatch: 0  loss: 2.3013498783111572
epoch: 3 minibatch: 5  loss: 2.3012008666992188
epoch: 3 minibatch: 10  loss: 2.3010799884796143
tensor(0.8260)

epoch: 4 minibatch: 0  loss: 2.3010506629943848
epoch: 4 minibatch: 5  loss: 2.3009777069091797
epoch: 4 minibatch: 10  loss: 2.300931215286255
tensor(0.8372)

epoch: 5 minibatch: 0  loss: 2.3009157180786133
epoch: 5 minibatch: 5  loss: 2.3008906841278076
epoch: 5 minibatch: 10  loss: 2.300880193710327
tensor(0.8426)

epoch: 6 minibatch: 0  loss: 2.300867557

In [None]:
print(test_label[:100])
print(predict_label[:100])

tensor([9, 0, 9, 1, 9, 4, 6, 3, 9, 8, 2, 9, 2, 1, 6, 1, 0, 9, 8, 7, 3, 4, 0, 4,
        4, 7, 2, 5, 2, 2, 5, 2, 8, 2, 8, 1, 5, 9, 9, 0, 0, 2, 5, 0, 6, 6, 5, 3,
        0, 4, 1, 9, 3, 9, 1, 1, 3, 3, 4, 0, 7, 2, 4, 2, 2, 5, 3, 3, 1, 1, 4, 8,
        3, 0, 9, 3, 2, 8, 2, 5, 0, 3, 3, 1, 1, 7, 1, 1, 9, 0, 6, 9, 2, 0, 9, 5,
        7, 0, 7, 6])
tensor([9, 0, 9, 7, 9, 4, 6, 3, 9, 3, 2, 9, 0, 1, 6, 1, 0, 0, 8, 7, 3, 9, 0, 4,
        4, 7, 2, 8, 2, 2, 5, 2, 8, 2, 1, 1, 5, 9, 9, 0, 0, 2, 5, 0, 6, 6, 4, 3,
        0, 4, 8, 9, 3, 4, 1, 1, 7, 3, 4, 0, 7, 2, 4, 2, 2, 5, 3, 3, 1, 1, 4, 8,
        3, 0, 9, 3, 2, 8, 2, 5, 0, 3, 3, 1, 1, 7, 1, 1, 9, 0, 6, 9, 0, 0, 9, 5,
        7, 0, 7, 6])


### Convolutional Neural Network

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_conv = nn.Sequential(nn.Conv2d(1, 16, 5),
                      nn.MaxPool2d((2,2)),
                      nn.SELU(),
                      nn.AlphaDropout(),
                      nn.Conv2d(16,32,5),
                      nn.MaxPool2d((2,2)),
                      nn.SELU(),
                      nn.Flatten(1),
                      nn.Linear(512,50),
                      nn.SELU(),
                      nn.AlphaDropout(0.1),
                      nn.Linear(50,10),
                      nn.Softmax(dim=1)
                      ).to(device)


In [None]:

loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model_conv.parameters())
train_sampler = torch.utils.data.DataLoader(mnist_train, batch_size=20000, shuffle=True,drop_last=True, pin_memory=True)
test_sampler = torch.utils.data.DataLoader(mnist_test, batch_size=len(mnist_test), shuffle=True,drop_last=True, pin_memory=True)
test_pic, test_label=next(iter(test_sampler))
test_pic = test_pic.to(device)


for epoch in range(1000):
  model_conv.train()
  train_data=iter(train_sampler)
  for i, (train_pic, train_label) in enumerate(train_data):
    train_pic=train_pic.to(device)

    optim.zero_grad()
    predict=model_conv(train_pic).to('cpu')
    loss = loss_fn(predict,train_label)
    if i % 1 == 0 :
      print(f'epoch: {epoch} minibatch: {i}  loss: {loss}')
    loss.backward()
    optim.step()
  model_conv.eval()
  predict_label=torch.argmax(model_conv(test_pic).to('cpu'),dim=1)
  print(f'epoch: {epoch} accuracy:{torch.mean((predict_label==test_label).float())}')

Transfer entire data set to GPU, eliminate memory transfer bottle-neck

In [None]:
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model_conv.parameters())
train_sampler = torch.utils.data.DataLoader(mnist_train, batch_size=len(mnist_train), shuffle=True,drop_last=True, pin_memory=True)
test_sampler = torch.utils.data.DataLoader(mnist_test, batch_size=len(mnist_test), shuffle=True,drop_last=True, pin_memory=True)
train_pic, train_label=next(iter(test_sampler))
train_pic = train_pic.to(device)
train_label = train_label.to(device)
test_pic, test_label=next(iter(test_sampler))
test_pic = test_pic.to(device)
test_label = test_label.to(device)


for epoch in range(1000):
  model_conv.train()
  optim.zero_grad()
  predict=model_conv(train_pic)
  loss = loss_fn(predict,train_label)
  loss.backward()
  optim.step()
  model_conv.eval()
  predict_label=torch.argmax(model_conv(test_pic),dim=1)
  print(f'epoch: {epoch} accuracy:{torch.mean((predict_label==test_label).float())}')

epoch: 0 accuracy:0.14959999918937683
epoch: 1 accuracy:0.27649998664855957
epoch: 2 accuracy:0.32839998602867126
epoch: 3 accuracy:0.3725000023841858
epoch: 4 accuracy:0.4323999881744385
epoch: 5 accuracy:0.48069998621940613
epoch: 6 accuracy:0.5048999786376953
epoch: 7 accuracy:0.5232999920845032
epoch: 8 accuracy:0.5342000126838684
epoch: 9 accuracy:0.54339998960495
epoch: 10 accuracy:0.5489000082015991
epoch: 11 accuracy:0.5565999746322632
epoch: 12 accuracy:0.5625
epoch: 13 accuracy:0.566100001335144
epoch: 14 accuracy:0.5722999572753906
epoch: 15 accuracy:0.579800009727478
epoch: 16 accuracy:0.5827000141143799
epoch: 17 accuracy:0.5889999866485596
epoch: 18 accuracy:0.592199981212616
epoch: 19 accuracy:0.5958999991416931
epoch: 20 accuracy:0.600600004196167
epoch: 21 accuracy:0.6033999919891357
epoch: 22 accuracy:0.609499990940094
epoch: 23 accuracy:0.6144999861717224
epoch: 24 accuracy:0.6238999962806702
epoch: 25 accuracy:0.6323999762535095
epoch: 26 accuracy:0.6402999758720398

In [None]:
del model_conv

In [None]:
t = torch.cuda.get_device_properties(0).total_memory
c = torch.cuda.memory_reserved(0)
a = torch.cuda.memory_allocated(0)
f = c-a  # free inside cache