In [34]:
import torch
from torch import nn
import numpy as np
import torch.utils.data as Data
import torch.optim as optim

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
torch.manual_seed(1)
torch.set_default_tensor_type('torch.FloatTensor')



print(torch.__version__)

<torch._C.Generator at 0x7ff73dc7dbb0>

1.3.1


In [9]:
# set input feature number 
NUM_FEATURES = 2
# set example number
NUM_EXAMPLES = 10000
# set batch size
BATCH_SZ = 10
# set learning rate
LEARNING_RATE = 1e-3
# set epochs
EPOCHS = 50

# set true weight and bias in order to generate corresponded label
TRUE_W = [2, -3.4]
TRUE_B = 4.2

In [10]:
features = torch.randn(NUM_EXAMPLES, NUM_FEATURES, dtype=torch.float32)
features.shape
features[0:1]
labels = TRUE_W[0] * features[:, 0] + TRUE_W[1] * features[:, 1] + TRUE_B
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float32)
labels.shape
labels[0:1]

torch.Size([10000, 2])

tensor([[-1.5256, -0.7502]])

torch.Size([10000])

tensor([3.6894])

In [15]:
# combine featues and labels of dataset
dataset = Data.TensorDataset(features, labels)

next(iter(dataset))

(tensor([-1.5256, -0.7502]), tensor(3.6894))

In [16]:
# put dataset into DataLoader
data_iter = Data.DataLoader(
    dataset=dataset,            # torch TensorDataset format
    batch_size=BATCH_SZ,      # mini batch size
    shuffle=True,               # whether shuffle the data or not
    num_workers=4,              # read data in multithreading
)

In [17]:
for X, y in data_iter:
    print(X, '\n', y)
    break


tensor([[-1.4693,  0.1884],
        [ 1.1691,  1.1241],
        [-1.3176, -0.5935],
        [ 0.9301, -0.6148],
        [ 0.3362,  0.2055],
        [-1.2149,  0.1785],
        [-2.1207, -0.6231],
        [ 0.3087, -0.8816],
        [ 0.1379, -0.6682],
        [ 0.4606, -1.0161]]) 
 tensor([0.6127, 2.7107, 3.5858, 8.1553, 4.1657, 1.1661, 2.0839, 7.8114, 6.7656,
        8.5689])


In [29]:
class LinearReg(nn.Module):
    def __init__(self, n_feature):
        super().__init__()      
        # function prototype: `torch.nn.Linear(in_features=2, out_features=1, bias=True)`
        self.linear = nn.Linear(n_feature, 1, bias=True) 
        
    def forward(self, x):
        y = self.linear(x)
        return y
    
network = LinearReg(NUM_FEATURES)
network

LinearReg(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)

In [33]:
# init.normal_(network.weight, mean=0.0, std=0.03)
# init.constant_(network.bias, val=0.0)  # or you can use `net[0].bias.data.fill_(0)` to modify it directly

for param in network.parameters():
    print(param)


Parameter containing:
tensor([[-0.0866,  0.1961]], requires_grad=True)
Parameter containing:
tensor([0.0349], requires_grad=True)


In [35]:
loss = nn.MSELoss()
optimizer = optim.SGD(network.parameters(), lr=LEARNING_RATE)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.001
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [36]:
for epoch in range(1, EPOCHS + 1):
    for X, y in data_iter:
        output = network(X)
        l = loss(output, y.view(-1, 1))
        optimizer.zero_grad() # reset gradient, equal to net.zero_grad()
        l.backward()
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))

epoch 1, loss: 1.187454
epoch 2, loss: 0.005361
epoch 3, loss: 0.000262
epoch 4, loss: 0.000113
epoch 5, loss: 0.000055
epoch 6, loss: 0.000107
epoch 7, loss: 0.000035
epoch 8, loss: 0.000096
epoch 9, loss: 0.000079
epoch 10, loss: 0.000053
epoch 11, loss: 0.000117
epoch 12, loss: 0.000177
epoch 13, loss: 0.000025
epoch 14, loss: 0.000112
epoch 15, loss: 0.000107
epoch 16, loss: 0.000182
epoch 17, loss: 0.000090
epoch 18, loss: 0.000112
epoch 19, loss: 0.000282
epoch 20, loss: 0.000178
epoch 21, loss: 0.000047
epoch 22, loss: 0.000059
epoch 23, loss: 0.000214
epoch 24, loss: 0.000117
epoch 25, loss: 0.000062
epoch 26, loss: 0.000066
epoch 27, loss: 0.000121
epoch 28, loss: 0.000140
epoch 29, loss: 0.000054
epoch 30, loss: 0.000132
epoch 31, loss: 0.000129
epoch 32, loss: 0.000106
epoch 33, loss: 0.000103
epoch 34, loss: 0.000034
epoch 35, loss: 0.000045
epoch 36, loss: 0.000191
epoch 37, loss: 0.000052
epoch 38, loss: 0.000109
epoch 39, loss: 0.000054
epoch 40, loss: 0.000100
epoch 41,

In [46]:
# result comparision
for name, param in network.named_parameters():
    if param.requires_grad:
        print(name)
        print(param.data)



linear.weight
tensor([[ 1.9999, -3.4000]])
linear.bias
tensor([4.2000])


In [47]:
TRUE_W, TRUE_B

([2, -3.4], 4.2)