In [1]:
# -*- coding: utf-8 -*-
import torch

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs.
# Setting requires_grad=False indicates that we do not need to compute gradients
# with respect to these Tensors during the backward pass.
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

# Create random Tensors for weights.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y using operations on Tensors; these
    # are exactly the same operations we used to compute the forward pass using
    # Tensors, but we do not need to keep references to intermediate values since
    # we are not implementing the backward pass by hand.
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the a scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call w1.grad and w2.grad will be Tensors holding the gradient
    # of the loss with respect to w1 and w2 respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    # An alternative way is to operate on weight.data and weight.grad.data.
    # Recall that tensor.data gives a tensor that shares the storage with
    # tensor, but doesn't track history.
    # You can also use torch.optim.SGD to achieve this.
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()

0 38082844.0
1 38286492.0
2 39968064.0
3 35979296.0
4 25413558.0
5 14029658.0
6 6859794.5
7 3520823.25
8 2121896.5
9 1492636.375
10 1157391.25
11 943341.5625
12 788443.0625
13 668121.5
14 570959.9375
15 490982.34375
16 424466.5
17 368597.96875
18 321427.53125
19 281383.40625
20 247169.28125
21 217783.171875
22 192459.609375
23 170566.1875
24 151552.125
25 134964.9375
26 120454.8125
27 107738.6015625
28 96564.5078125
29 86729.875
30 78030.421875
31 70313.8125
32 63455.75
33 57348.21875
34 51903.87109375
35 47046.375
36 42697.82421875
37 38796.0859375
38 35289.375
39 32131.224609375
40 29283.67578125
41 26714.39453125
42 24392.755859375
43 22291.9375
44 20389.13671875
45 18664.03515625
46 17098.2265625
47 15675.267578125
48 14381.076171875
49 13202.5830078125
50 12130.7978515625
51 11152.353515625
52 10259.125
53 9442.7802734375
54 8696.119140625
55 8012.5810546875
56 7386.44580078125
57 6813.34228515625
58 6287.79736328125
59 5805.5966796875
60 5362.55126953125
61 4955.3076171875
62 458

In [1]:
from sklearn.ensemble import AdaBoostClassifier,RandomForestClassifier,BaggingClassifier,GradientBoostingClassifier
