In [1]:
import torch
import os 
# set available GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

## 矩阵与张量

In [2]:
x = torch.tensor(
    [
        [1, 2, 3],
        [4, 5.0, 6],
        [7, 8, 9]
    ]
)
print(x.shape, x)

torch.Size([3, 3]) tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])


In [3]:
y = torch.Tensor(
    [
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]
    ]
)
print(y.shape, y)

torch.Size([3, 3]) tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])


In [4]:
print(x.dtype, y.dtype)

torch.float32 torch.float32


In [5]:
x = torch.tensor(
    [
        [
            [1, 2, 3],
            [4, 5, 6],
            [7, 8, 9],
        ],
        [
            [1, 2, 3],
            [4, 5, 6],
            [7, 8, 9],
        ]
    ]
    
)
print(x.shape)

torch.Size([2, 3, 3])


In [6]:
x = torch.zeros(1, 2, 3, 1)
print(x)

tensor([[[[0.],
          [0.],
          [0.]],

         [[0.],
          [0.],
          [0.]]]])


In [7]:
x = torch.ones(1, 2, 3, 1)
x = torch.rand(1, 2, 3, 1)
print(x)
x = torch.randn([1, 2, 3, 1], device="cuda:0", requires_grad=True)
print(x)

tensor([[[[0.0437],
          [0.5319],
          [0.1929]],

         [[0.0923],
          [0.5913],
          [0.0879]]]])


RuntimeError: No CUDA GPUs are available

## 四则运算与矩阵乘法

In [None]:
x = torch.randn(2, 2, 2)
print('x', x)
y = torch.randn(2, 2)[::, None].expand(2, 2, 2)
print('y', y)
z = x + y
print('z', z)
a = x - y
print('a', a)
b = x * y
print('b', b)
c = x / y

x tensor([[[ 1.7883,  1.8412],
         [ 0.4796,  0.3216]],

        [[ 1.3055,  1.3130],
         [-0.1197,  0.1863]]])
y tensor([[[-1.7623, -0.3706],
         [-1.7623, -0.3706]],

        [[-1.8145,  0.4805],
         [-1.8145,  0.4805]]])
z tensor([[[ 0.0260,  1.4707],
         [-1.2827, -0.0489]],

        [[-0.5091,  1.7935],
         [-1.9342,  0.6668]]])
a tensor([[[ 3.5506,  2.2118],
         [ 2.2419,  0.6922]],

        [[ 3.1200,  0.8325],
         [ 1.6948, -0.2942]]])
b tensor([[[-3.1516, -0.6823],
         [-0.8452, -0.1192]],

        [[-2.3689,  0.6309],
         [ 0.2172,  0.0895]]])


In [None]:
l = torch.zeros(2, 3)
print(l + x)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/yuliu/anaconda3/envs/AGI/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_3561188/3335689457.py", line 2, in <module>
    print(l + x)
RuntimeError: The size of tensor a (3) must match the size of tensor b (2) at non-singleton dimension 2

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/yuliu/anaconda3/envs/AGI/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2105, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
  File "/home/yuliu/anaconda3/envs/AGI/lib/python3.8/site-packages/IPython/core/ultratb.py", line 1396, in structured_traceback
    return FormattedTB.structured_traceback(
  File "/home/yuliu/anaconda3/envs/AGI/lib/python3.8/site-packages/IPython/core/ultratb.py", line 1287, in structured_traceback
    return 

#### 矩阵乘法

In [None]:
x = torch.randn(2, 3)
y = torch.randn(4, 3)
print(y.T.shape)
z = x @ y.T  # ab bc -> ac
print(z.shape)

torch.Size([3, 4])
torch.Size([2, 4])


In [None]:
z1 = torch.matmul(x, y.T)
print(z == z1)

tensor([[True, True, True, True],
        [True, True, True, True]])


In [None]:
x = torch.randn(5, 2, 3)
y = torch.randn(5, 3, 4)
z = x @ y
print(z.shape)

torch.Size([5, 2, 4])


In [None]:
x = torch.randn(2, 5, 2, 3)
y = torch.randn(2, 5, 4, 3)
# z = x @ y.T
# z = x @ y.permute(0, 1, 3, 2)
# z = x @ y.transpose(2, 3)
z = x @ y.transpose(-1, -2)
print(z.shape)

torch.Size([2, 5, 2, 4])


In [None]:
z = torch.einsum('bcij,bckj->bcik', x, y)
print(z.shape)

torch.Size([2, 5, 2, 4])


## 自动微分

In [None]:
x = torch.randn(2, 3)
x.requires_grad_(True)
y = (x * 2).sum()
y.backward()
print(x.grad)

tensor([[2., 2., 2.],
        [2., 2., 2.]])


In [None]:
x = torch.randn([2, 3], requires_grad=True)
y = torch.randn([3, 4], requires_grad=True)
z = x @ y
l = z.sum()
l.backward()

In [None]:
print(x.grad)
print(y.grad)

tensor([[-0.4454,  1.1645, -0.1907],
        [-0.4454,  1.1645, -0.1907]])
tensor([[-2.5393, -2.5393, -2.5393, -2.5393],
        [ 0.4827,  0.4827,  0.4827,  0.4827],
        [ 1.6783,  1.6783,  1.6783,  1.6783]])


## 神经网络

In [8]:
import torch.nn as nn
import torch.optim as optim

model = nn.Linear(5, 2)

x = torch.randn(10, 5)
y_target = torch.randn(10, 2)
criterion = nn.MSELoss()  
optimizer = optim.SGD(model.parameters(), lr=0.1)  
  
for epoch in range(10):  
    optimizer.zero_grad()  
    y = model(x)
    loss = criterion(y, y_target)  
    loss.backward()  
    optimizer.step()
    print("loss: ", loss.item())

loss:  1.559524416923523
loss:  1.3260045051574707
loss:  1.1665043830871582
loss:  1.0556107759475708
loss:  0.9769428372383118
loss:  0.9198894500732422
loss:  0.877536952495575
loss:  0.8453460931777954
loss:  0.8203096389770508
loss:  0.800412654876709


In [9]:
from torchvision.datasets import CIFAR10, MNIST
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch.nn as nn
from torch import optim


torch.manual_seed(0)

<torch._C.Generator at 0x185b1a3ac10>

In [10]:
# Load dataset
# dataset_train = CIFAR10(root='./data', train=True, transform=ToTensor(), download=True)
# dataset_test = CIFAR10(root='./data', train=False, transform=ToTensor(), download=True)
dataset_train = MNIST(root='./data', train=True, transform=ToTensor(), download=True)
dataset_test = MNIST(root='./data', train=False, transform=ToTensor(), download=True)
dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=32, shuffle=False)

for x, y in tqdm(dataloader_train):
    print(x.shape, y.shape)
    print(y)
    break

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 14458019.17it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<?, ?it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 8697473.09it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<?, ?it/s]

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw




  0%|          | 0/1875 [00:00<?, ?it/s]

torch.Size([32, 1, 28, 28]) torch.Size([32])
tensor([6, 8, 8, 7, 8, 0, 0, 5, 6, 0, 0, 3, 5, 8, 1, 6, 2, 4, 1, 4, 9, 4, 3, 4,
        1, 4, 8, 7, 6, 3, 1, 8])





In [14]:
# Define model
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        # self.fc = nn.Linear(32 * 32 * 3, 10)
        self.fc = nn.Linear(28 * 28, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
    
# Define training and testing loop
def train(model, dataloader):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # training loop
    for epoch in range(5):
        for x, y in tqdm(dataloader):
            x, y = x.cuda(), y.cuda()
            optimizer.zero_grad()
            y_pred = model(x)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
        print("loss: ", loss.item())

def test(model, dataloader):
    acc = 0
    for x, y in tqdm(dataloader):
        x, y = x.cuda(), y.cuda()
        y_pred = model(x)
        acc += (y_pred.argmax(dim=1) == y).float().mean().item()
    print("acc: ", acc / len(dataloader))


In [13]:
model = MyModel().cuda()
train(model, dataloader_train)
test(model, dataloader_test)

RuntimeError: No CUDA GPUs are available

In [15]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.fc = nn.Sequential(
            # nn.Linear(32 * 32 * 3, 64),
            nn.Linear(28 * 28, 64),
            nn.ReLU(),
            nn.Linear(64, 10),
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
    
model = MyModel().cuda()
train(model, dataloader_train)
test(model, dataloader_test)

RuntimeError: No CUDA GPUs are available

In [16]:
# conv model
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv = nn.Sequential(
            # nn.Conv2d(3, 16, 3, padding=1),
            nn.Conv2d(1, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        self.fc = nn.Sequential(
            # nn.Linear(64 * 8 * 8, 64),
            nn.Linear(64 * 7 * 7, 64),
            nn.ReLU(),
            nn.Linear(64, 10),
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

model = MyModel().cuda()
train(model, dataloader_train)
test(model, dataloader_test)

RuntimeError: No CUDA GPUs are available