In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from model import myPointNet
import torch.utils.data as data

# 针对dataset
import os
import numpy as np
from torch.utils.data import Dataset
from dataset import ShapenetBinaryDataset
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

In [24]:
# 步骤 1: 准备数据集（假设已有数据集）
dataset = ShapenetBinaryDataset(data_dir='data', class_labels=['airplane', 'backpack'])
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)

In [25]:
# 创建训练集 DataLoader
train_data_loader = DataLoader(train_data, batch_size=8, shuffle=True)
# 创建测试集 DataLoader
test_data_loader = DataLoader(test_data, batch_size=8, shuffle=False)

In [26]:
data_iter = iter(train_data_loader)

# 获取下一个数据批次
batch = next(data_iter)

# 打印批次内容
print(len(batch))
print(len(batch[0]))
print(batch)

2
8
[tensor([[[ 9.2870e-02, -2.1650e-02,  2.9110e-02],
         [-1.4087e-01, -6.7560e-02, -1.2300e-02],
         [ 1.5680e-01, -5.0140e-02, -3.2960e-02],
         ...,
         [-3.0471e-01, -1.1040e-02, -1.0680e-02],
         [ 5.3100e-03, -5.0460e-02,  1.7272e-01],
         [-1.9430e-02, -1.8700e-02, -2.7700e-02]],

        [[-1.4211e-01,  2.9911e-01, -8.4270e-02],
         [ 1.4566e-01,  1.1990e-02, -1.3365e-01],
         [ 1.4566e-01,  6.3170e-02, -1.1807e-01],
         ...,
         [-1.1258e-01, -2.7780e-02,  2.8352e-01],
         [-1.0501e-01, -1.0349e-01,  1.8457e-01],
         [-9.4020e-02, -2.1341e-01, -3.3270e-02]],

        [[ 3.1540e-02, -4.8740e-02,  2.0822e-01],
         [ 3.7450e-02,  1.5284e-01, -1.5407e-01],
         [ 5.8800e-03,  1.4230e-02, -1.0871e-01],
         ...,
         [-1.5740e-02, -2.1077e-01, -3.3002e-01],
         [-1.9880e-02, -3.1905e-01, -3.6272e-01],
         [-3.4750e-02, -3.2559e-01, -3.6470e-01]],

        ...,

        [[ 3.6560e-02, -3.2131e-0

In [27]:
# 步骤 2: 定义模型
pointNet_model = myPointNet()

# 步骤 3: 选择损失函数
criterion = nn.CrossEntropyLoss()  # 使用交叉熵损失

# 步骤 4: 选择优化器
optimizer = optim.SGD(pointNet_model.parameters(), lr=0.01)

In [33]:
import torch

# 创建一个示例张量
x = torch.tensor([[[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]]])

# 在第2维中找到每列的最大值
result = torch.max(x, 2, keepdim=True)[0]
result = result.view(-1, 3)
# 输出结果
print(result)

tensor([3, 6, 9])


In [34]:
import torch.nn as nn
from torch.autograd import Variable
import torch
import numpy as np
class STD3(nn.Module):
    def __init__(self):
        super(STD3, self).__init__()
        self.model = nn.Sequential(
            nn.Conv1d(3,64,1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64,128,1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Conv1d(128,1024,1),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.MaxPool1d(1),
            nn.Linear(1024,512),
            nn.ReLU(),
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Linear(256,9),
            nn.ReLU(),
        )
    def forward(self,x):
        batchsize = len(x)
        x = self.model(x)
        # iden = Variable(torch.from_numpy(np.array([1,0,0,0,1,0,0,0,1]).astype(np.float32))).view(1,9).repeat(batchsize,1)
        # x+=iden
        x = x.view((-1,3,3))
        return x
#验证一下T-Net创建的是否正确
input = torch.ones((64,3,1024)) # 因为用的是Conv1d 所以需要X,Y轴相互替换 (batch_size, num_features, num_points)
test = STD3()
output = test(input)
len(output),output

(65536,
 tensor([[[0.0000, 0.0000, 0.0234],
          [0.0000, 0.0307, 0.0000],
          [0.0015, 0.0000, 0.0010]],
 
         [[0.0000, 0.0000, 0.0234],
          [0.0000, 0.0307, 0.0000],
          [0.0015, 0.0000, 0.0010]],
 
         [[0.0000, 0.0000, 0.0234],
          [0.0000, 0.0307, 0.0000],
          [0.0015, 0.0000, 0.0010]],
 
         ...,
 
         [[0.0000, 0.0000, 0.0234],
          [0.0000, 0.0307, 0.0000],
          [0.0015, 0.0000, 0.0010]],
 
         [[0.0000, 0.0000, 0.0234],
          [0.0000, 0.0307, 0.0000],
          [0.0015, 0.0000, 0.0010]],
 
         [[0.0000, 0.0000, 0.0234],
          [0.0000, 0.0307, 0.0000],
          [0.0015, 0.0000, 0.0010]]], grad_fn=<ViewBackward0>))

In [8]:
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
class STN3d(nn.Module):
    def __init__(self):
        super(STN3d, self).__init__()
        self.conv1 = torch.nn.Conv1d(3, 64, 1)
        self.conv2 = torch.nn.Conv1d(64, 128, 1)
        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 9)
        self.relu = nn.ReLU()

        self.bn1 = nn.BatchNorm1d(64) # 归一化 => 防止过拟合&加快收敛
        self.bn2 = nn.BatchNorm1d(128) # 归一化 => 防止过拟合&加快收敛
        self.bn3 = nn.BatchNorm1d(1024) # 归一化 => 防止过拟合&加快收敛
        self.bn4 = nn.BatchNorm1d(512) # 归一化 => 防止过拟合&加快收敛
        self.bn5 = nn.BatchNorm1d(256) # 归一化 => 防止过拟合&加快收敛


    def forward(self, x):
        batchsize = x.size()[0]
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = torch.max(x, 2, keepdim=True)[0] # 竖着的方向上找到 max
        x = x.view(-1, 1024) # 变成1024列数据，行自己适应

        x = F.relu(self.bn4(self.fc1(x)))
        x = F.relu(self.bn5(self.fc2(x)))
        x = self.fc3(x)

        iden = Variable(torch.from_numpy(np.array([1,0,0,0,1,0,0,0,1]).astype(np.float32))).view(1,9).repeat(batchsize,1) # Variable可以把输出的Tensor变成一个输入变量，这样梯度就不会回传了。
        if x.is_cuda:
            iden = iden.cuda()
        x = x + iden
        x = x.view(-1, 3, 3)
        return x

In [9]:
from torchsummary import summary
import torch
input = torch.ones((64,1024,3)) # 因为用的是Conv1d 所以需要X,Y轴相互替换 (batch_size, num_features, num_points)
input = input.permute(0,2,1) # 让dim=1 和 dim=2 互换，因为这里用到的是一维卷积
print(input.size())
model = STN3d()
input_size = (3, 100)  # 输入大小，注意这里的顺序是 (num_features, num_points)
summary(model, input_size=input_size,batch_size=64)  # 适配你的设备
print(model(input).size()) # 这一批我就搞一个

torch.Size([64, 3, 1024])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1              [64, 64, 100]             256
       BatchNorm1d-2              [64, 64, 100]             128
            Conv1d-3             [64, 128, 100]           8,320
       BatchNorm1d-4             [64, 128, 100]             256
            Conv1d-5            [64, 1024, 100]         132,096
       BatchNorm1d-6            [64, 1024, 100]           2,048
            Linear-7                  [64, 512]         524,800
       BatchNorm1d-8                  [64, 512]           1,024
            Linear-9                  [64, 256]         131,328
      BatchNorm1d-10                  [64, 256]             512
           Linear-11                    [64, 9]           2,313
Total params: 803,081
Trainable params: 803,081
Non-trainable params: 0
----------------------------------------------------------------
Inpu