In [1]:
import torch


In [2]:
t1 = torch.tensor([[1,3,5,6],[4,5,6,7]])
t1.dtype

torch.int64

In [3]:
x= torch.tensor(1.0, requires_grad=False)

In [4]:
w=torch.tensor(3.0,requires_grad=True)
b= torch.tensor(6.0, requires_grad=True)

In [5]:
import numpy as np

In [6]:
y = w*x+b
m= torch.tensor(2.0, requires_grad=True)
z= 1*y + m


In [7]:
y.backward(retain_graph=True)  #Now we can do backward propagation to compute gradients

In [8]:
print(x.grad)

None


In [9]:
print(w.grad)

tensor(1.)


In [10]:
print(b.grad)

tensor(1.)


In [11]:
z.backward()

In [12]:
print(m.grad)

tensor(1.)


In [13]:
x= np.array([1.0,2.0,3.0])

In [14]:
x

array([1., 2., 3.])

In [15]:
y = torch.tensor(x)

In [16]:
x.dtype

dtype('float64')

In [17]:
a = torch.tensor([1.,2.,3.,5,6], dtype=torch.float32)

In [18]:
b = torch.tensor([1.,2.,3.,5,6], dtype=torch.float32)

In [19]:
diff=a-b
diff

tensor([0., 0., 0., 0., 0.])

In [20]:
diff.numel()

5

## Lets create a neural network

In [21]:
"""
Suppose this is our dataset
"""
import pandas as pd
df = pd.DataFrame({"Age":[20,21,22,23,24,25], "Experience":[1,2,3,4,5,6],"Salary":[20000,21000,22000,23000,24000,25000]})
df

Unnamed: 0,Age,Experience,Salary
0,20,1,20000
1,21,2,21000
2,22,3,22000
3,23,4,23000
4,24,5,24000
5,25,6,25000


In [22]:
X= torch.tensor([[20,1],[21,2],[22,3],[23,4],[24,5],[25,6]], dtype=torch.float32)

In [23]:
X

tensor([[20.,  1.],
        [21.,  2.],
        [22.,  3.],
        [23.,  4.],
        [24.,  5.],
        [25.,  6.]])

In [24]:
y=torch.tensor([[20000,21000,22000,23000,24000,25000]], dtype=torch.float32).T

In [25]:
y

tensor([[20000.],
        [21000.],
        [22000.],
        [23000.],
        [24000.],
        [25000.]])

In [26]:
# formula --- y = WX + b
print(X.shape) # X is the [x1,x2,...xm]
print(y.shape)

torch.Size([6, 2])
torch.Size([6, 1])


In [27]:
"""
salary = w1*Age + W2*Experience + b
"""
b=torch.randn(1, requires_grad=True) 
W= torch.randn(1,2, requires_grad=True) #shape of W - (number of neurons in forward layer , number of neurons in backward layer)

In [28]:
print(W)
print(b)

tensor([[-0.5553, -0.1381]], requires_grad=True)
tensor([0.8382], requires_grad=True)


In [29]:
def model(X):
    y_pred = X @ W.T + b
    return y_pred

In [30]:
def mse(y_pred):
    loss2 = torch.sum((y_pred - y)**2)
    mse = loss2/len(y_pred)
    return mse

In [31]:
y_pred = model(X)

In [32]:
loss = mse(y_pred)
print(y_pred)
print(loss)

tensor([[-10.4063],
        [-11.0997],
        [-11.7932],
        [-12.4866],
        [-13.1801],
        [-13.8735]], grad_fn=<AddBackward0>)
tensor(5.0972e+08, grad_fn=<DivBackward0>)


In [33]:
y.shape
y_pred.shape

torch.Size([6, 1])

## Key Takeaways
- X is a matrix of vectors x,x2,x2....xm
- The shape of W is (no. of output columns, no. of inputs columns) 
- For 1 output, there is only 1 bias

In [34]:
#Lets update the weight of parameters W and b to minimize the cost by back prop
loss.backward()

In [35]:
"""
print(W)
print("dL/dW =", W.grad)
print(b)
print("dL/db =", b.grad)
"""


'\nprint(W)\nprint("dL/dW =", W.grad)\nprint(b)\nprint("dL/db =", b.grad)\n'

In [36]:
# After getting the gradient, we dont want pytorch to add the previous gradients to our new ones. So
with torch.no_grad():
    W -= 1e-5 * W.grad  # W=W-lr*dL/dW
    b -= 1e-5 * b.grad  # b=b-lr*dL/db
    W.grad.zero_()
    b.grad.zero_()

In [37]:
print(W)
print(b)

tensor([[9.6335, 1.4961]], requires_grad=True)
tensor([1.2885], requires_grad=True)


In [38]:
#Lets iterate it through epochs
for i in range(200):
    y_pred = model(X)
    loss = mse(y_pred)
    loss.backward()
    with torch.no_grad():
        W-= 1e-3 * W.grad
        b-= 1e-3 * b.grad
        W.grad.zero_()
        b.grad.zero_()
print(loss)

tensor(9803.8584, grad_fn=<DivBackward0>)


In [39]:
#!pip install jovian --upgrade -q

## Linear Regression with PyTorch built-ins

In [81]:
import torch.nn as nn
import torch.nn.functional as F

In [82]:
X= torch.tensor([[20,1],[21,2],[22,3],[23,4],[24,5],[25,6]], dtype=torch.float32)
y=torch.tensor([[20000,21000,22000,23000,24000,25000]], dtype=torch.float32).T

In [83]:
from torch.utils.data import DataLoader,TensorDataset


In [84]:
# TensorDatasets converts our datasets to their own standard implementation
td = TensorDataset(X,y)
td[:3] #selected the first 3 rows of X and y

(tensor([[20.,  1.],
         [21.,  2.],
         [22.,  3.]]),
 tensor([[20000.],
         [21000.],
         [22000.]]))

In [85]:
# DataLoader loads the datasets with specified batches for faster computation
dl = DataLoader(td, batch_size=4, shuffle=True)


In [86]:
list(dl) # created a batch of size 4

[[tensor([[20.,  1.],
          [25.,  6.],
          [22.,  3.],
          [21.,  2.]]),
  tensor([[20000.],
          [25000.],
          [22000.],
          [21000.]])],
 [tensor([[23.,  4.],
          [24.,  5.]]),
  tensor([[23000.],
          [24000.]])]]

In [87]:
for xl,yl in dl:
    print(xl)
    print(yl)

tensor([[24.,  5.],
        [25.,  6.],
        [20.,  1.],
        [21.,  2.]])
tensor([[24000.],
        [25000.],
        [20000.],
        [21000.]])
tensor([[23.,  4.],
        [22.,  3.]])
tensor([[23000.],
        [22000.]])


## torch.nn.Linear

- This Linear class of nn package can initialize the weights automatically

In [88]:
import torch.nn as nn
model = nn.Linear(in_features=2, out_features=1) # We have 2 input features and only 1 output target
model #This model is linear regression model

Linear(in_features=2, out_features=1, bias=True)

In [89]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.3163,  0.4102]], requires_grad=True),
 Parameter containing:
 tensor([0.4858], requires_grad=True)]

In [90]:
model.weight

Parameter containing:
tensor([[-0.3163,  0.4102]], requires_grad=True)

In [91]:
model.bias

Parameter containing:
tensor([0.4858], requires_grad=True)

In [92]:
y_pred = model(X)
print(y_pred)

tensor([[-5.4303],
        [-5.3364],
        [-5.2426],
        [-5.1487],
        [-5.0549],
        [-4.9610]], grad_fn=<AddmmBackward>)


In [93]:
los = F.mse_loss

In [95]:
#  ?nn.Linear

In [96]:
print(y.shape)
print(y_pred.shape)

torch.Size([6, 1])
torch.Size([6, 1])


In [97]:
loss = los(y_pred,y)
loss

tensor(5.0940e+08, grad_fn=<MseLossBackward>)

In [98]:
# Now optimizing the parameters to reduce the loss
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

### ANN Training

In [8]:
import torch, torchvision

from torchvision import datasets, transforms

In [9]:
torch.__version__

'1.8.1+cpu'

In [10]:
train = datasets.MNIST('', train=True, download=True, transform = transforms.Compose([transforms.ToTensor()]))

testt = datasets.MNIST('', train=False, download=True, transform = transforms.Compose([transforms.ToTensor()]))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting MNIST\raw\train-images-idx3-ubyte.gz to MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting MNIST\raw\train-labels-idx1-ubyte.gz to MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting MNIST\raw\t10k-images-idx3-ubyte.gz to MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting MNIST\raw\t10k-labels-idx1-ubyte.gz to MNIST\raw

Processing...


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Done!


In [11]:
from torch.utils.data import DataLoader

In [13]:
train_dataset = DataLoader(train, batch_size=10, shuffle=True)
test_dataset = DataLoader(testt, batch_size=10, shuffle=True)

In [14]:
len(train_dataset)

6000

In [15]:
len(test_dataset)

1000