In [1]:
import torch

In [2]:
x = torch.tensor(3.0, requires_grad=True)
x

tensor(3., requires_grad=True)

In [3]:
y = x**2

In [4]:
x,y

(tensor(3., requires_grad=True), tensor(9., grad_fn=<PowBackward0>))

In [5]:
y.backward()
## calculate all the backward derivation

In [6]:
## get the grad value
x.grad

tensor(6.)

In [7]:
## other example

In [8]:
x = torch.tensor(3.0, requires_grad=True)
x

tensor(3., requires_grad=True)

In [9]:
y = x**2

In [10]:
y

tensor(9., grad_fn=<PowBackward0>)

In [11]:
z = torch.sin(y)
z

tensor(0.4121, grad_fn=<SinBackward0>)

In [12]:
z.backward()

In [13]:
x.grad

tensor(-5.4668)

In [14]:
x = torch.tensor(4.0, requires_grad=True)
x

tensor(4., requires_grad=True)

In [15]:
y = x**2
y

tensor(16., grad_fn=<PowBackward0>)

In [16]:
z = torch.sin(y)
z

tensor(-0.2879, grad_fn=<SinBackward0>)

In [17]:
z.backward()

In [18]:
x.grad

tensor(-7.6613)

In [19]:
y.grad

  y.grad


## Small nural network

### First creating it manually

In [20]:
x = torch.tensor(6.7)
y = torch.tensor(0)

w = torch.tensor(1)
b = torch.tensor(0)

x, y, w, b

(tensor(6.7000), tensor(0), tensor(1), tensor(0))

In [21]:
def loss(y_pred, y):
    epsilon = 1e-10
    y_pred = torch.clamp(y_pred, epsilon, 1-epsilon)
    return -y*torch.log(y_pred) - (1-y)*torch.log(1-y_pred)

def predict(x):
    return torch.sigmoid(w*x + b)

l = loss(predict(x), y)
l

tensor(6.7012)

In [22]:
## calculate the derivatives
y_hat = predict(x)

dl_dw = (y_hat - y)*x
dl_db = (y_hat - y)

dl_dw

tensor(6.6918)

In [23]:
dl_db

tensor(0.9988)

In [24]:
## we can use chain rule and see each derivation

# dl_dw = dl_y_pre * y_hat_dz * dz_dw

dl_y_pre = (y_hat - y)/(y_hat*(1-y_hat))

y_hat_dz = y_hat*(1-y_hat)

dz_dw = x

dl_dw = dl_y_pre * y_hat_dz * dz_dw

dl_dw

tensor(6.6918)

In [25]:
# dl_db = dl_y_pre * y_hat_dz * dz_db

dl_y_pre = (y_hat - y)/(y_hat*(1-y_hat))

y_hat_dz = y_hat*(1-y_hat)

dz_db = 1

dl_db = dl_y_pre * y_hat_dz * dz_db

dl_db

tensor(0.9988)

In [26]:
### so we get the gradients

## UseAutograde

In [27]:
import torch

In [28]:
x = torch.tensor(6.7)
y = torch.tensor(0.0)
x,y

(tensor(6.7000), tensor(0.))

In [29]:
w = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)
w,b

(tensor(1., requires_grad=True), tensor(0., requires_grad=True))

In [30]:
z = w*x + b
z

tensor(6.7000, grad_fn=<AddBackward0>)

In [31]:
y_pred = torch.sigmoid(z)
y_pred

tensor(0.9988, grad_fn=<SigmoidBackward0>)

In [32]:
loss  = -y*torch.log(y_pred) - (1-y)*torch.log(1-y_pred)
loss

tensor(6.7012, grad_fn=<SubBackward0>)

In [33]:
loss

tensor(6.7012, grad_fn=<SubBackward0>)

In [34]:
loss.backward()

In [35]:
w.grad

tensor(6.6918)

In [36]:
b.grad

tensor(0.9988)

In [37]:
x = torch.tensor([1.0,2.0,3.0], requires_grad= True)
x

tensor([1., 2., 3.], requires_grad=True)

In [38]:
y = (x**2).mean()
y

tensor(4.6667, grad_fn=<MeanBackward0>)

In [39]:
y.backward()

In [40]:
x.grad

tensor([0.6667, 1.3333, 2.0000])

In [41]:
## x.grad.zero

## Clarning grad

In [42]:
x = torch.tensor(2.0, requires_grad = True)
x

tensor(2., requires_grad=True)

In [43]:
y = x**2
y

tensor(4., grad_fn=<PowBackward0>)

In [44]:
y.backward()

In [45]:
x.grad

tensor(4.)

In [46]:
x.grad

tensor(4.)

In [47]:
## it alos add the previous numbers

In [48]:
## it store the values

In [49]:
## to clear the gradient
x.grad.zero_()

tensor(0.)

In [50]:
print(x.grad)

tensor(0.)


In [51]:
##
## require_grad_(False)
## detach()
## torch.no_grad()

In [52]:
x

tensor(2., requires_grad=True)

In [53]:
## inplace ( _ )
## of the grading
x.requires_grad_(False)
x

tensor(2.)

In [54]:
y = x**2
y

tensor(4.)

In [55]:
y.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [56]:
### detach

In [57]:
x = torch.tensor(2.0, requires_grad = True)
x

tensor(2., requires_grad=True)

In [58]:
## detecth
z = x.detach()
z

tensor(2.)

In [59]:
y = x**2
y

tensor(4., grad_fn=<PowBackward0>)

In [60]:
y1 = z**2
y1


tensor(4.)

In [61]:
# The result will never require gradient.

# This method also affects forward mode AD gradients and the result will never
# have forward mode AD gradients.

# .. note::

#   Returned Tensor shares the same storage with the original one.
#   In-place modifications on either of them will be seen, and may trigger


z = x.detach()

In [62]:
z

tensor(2.)

In [63]:
y.backward()

In [66]:
y1.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [67]:
### other way

x = torch.tensor(2.0, requires_grad = True)
x

tensor(2., requires_grad=True)

In [68]:
## new method

with torch.no_grad():
    y = x**2

In [69]:
y

tensor(4.)

In [71]:
y.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [72]:
## new method

with torch.no_grad():
    y = x**2
    print(y)

tensor(4.)


In [73]:
## new method

with torch.no_grad():
    y = x**2
    print(y)
    y.backward()
    print(x.grad)


tensor(4.)


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [74]:
print("Done AutoGrad")

Done AutoGrad
