# 1. Transforms用途
① Transforms当成工具箱的话，里面的class就是不同的工具。例如像totensor、resize这些工具。

② Transforms拿一些特定格式的图片，经过Transforms里面的工具，获得我们想要的结果。

In [1]:
from torchvision import transforms
from PIL import Image

img_path = "/storage/pt/AW_STUDY/pytorch-tutorial-tudui/hymenoptera_data/train/ants/5650366_e22b7e1065.jpg"
img = Image.open(img_path)

tensor_trans = transforms.ToTensor()
tensor_img = tensor_trans(img)
print(tensor_img)

tensor([[[0.3804, 0.3804, 0.3843,  ..., 0.3412, 0.3373, 0.3333],
         [0.3765, 0.3804, 0.3843,  ..., 0.3529, 0.3490, 0.3451],
         [0.3804, 0.3804, 0.3843,  ..., 0.3725, 0.3686, 0.3647],
         ...,
         [0.6078, 0.6078, 0.6118,  ..., 0.4627, 0.4627, 0.4627],
         [0.5882, 0.5922, 0.5922,  ..., 0.4588, 0.4588, 0.4588],
         [0.5804, 0.5804, 0.5843,  ..., 0.4549, 0.4549, 0.4549]],

        [[0.4667, 0.4667, 0.4706,  ..., 0.4039, 0.4000, 0.3961],
         [0.4706, 0.4667, 0.4706,  ..., 0.3922, 0.3882, 0.3843],
         [0.4745, 0.4745, 0.4784,  ..., 0.3804, 0.3765, 0.3725],
         ...,
         [0.5961, 0.5961, 0.6000,  ..., 0.4588, 0.4588, 0.4588],
         [0.5882, 0.5922, 0.5922,  ..., 0.4549, 0.4549, 0.4549],
         [0.5804, 0.5804, 0.5804,  ..., 0.4510, 0.4510, 0.4510]],

        [[0.4157, 0.4157, 0.4196,  ..., 0.3608, 0.3569, 0.3529],
         [0.4196, 0.4157, 0.4196,  ..., 0.3569, 0.3529, 0.3490],
         [0.4235, 0.4235, 0.4235,  ..., 0.3608, 0.3569, 0.

In [2]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter("logs") 
writer.add_image("Temsor_img",tensor_img) 
writer.close()

## 归一化：output[c] = (input[c] - mean[c]) / std[c]
如果原始像素值是 x，所在通道均值和方差都是0.5，则归一化后的值为 (x - 0.5) / 0.5
### 其他：
* .detach(): 这个操作是为了从计算图中分离出这个tensor,使其不再参与梯度计算。这是因为我们现在只需要获取tensor的值,而不需要进行任何训练或微调。
* .cpu(): 这个操作是为了将tensor从GPU内存转移到CPU内存。这样做是为了使后续的numpy转换更加顺利,因为numpy无法直接操作GPU内存上的数据。
* .numpy(): 这个操作是将tensor转换为numpy数组。numpy数组是一种更加通用的数据结构,可以用于各种数据处理和可视化操作。
* .astype(np.uint8): 这个操作是将numpy数组的数据类型转换为无符号8位整数(uint8)。
在图像处理中,像素值通常以8位整数(0-255)表示。将数组转换为uint8类型可以确保数据符合图像的标准格式。
这一步也是必要的,因为在前一步中,tensor被转换为numpy数组后,数值可能还保留了小数部分。转换为uint8确保数值被截断到整数范围内,符合图像的要求。
如果是64位图像，使用np.uint16而不是np.uint8,因为16位整数可以表示0-65535的范围,足以覆盖64位图像的像素值。

In [57]:
import torch
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from PIL import Image
import cv2
import numpy as np

# help(torch.tensor)

img_path = "/storage/pt/AW_STUDY/pytorch-tutorial-tudui/hymenoptera_data/train/ants/5650366_e22b7e1065.jpg"
img = Image.open(img_path)

# 不需要 SummaryWriter 来保存图片到本地
# writer = SummaryWriter("logs") 

c_mean, c_std = [0.5,0.5,0.5], [0.5,0.5,0.5]

tensor_trans = transforms.ToTensor() 
img_tensor = tensor_trans(img)  
print(f"origin:{img_tensor}")

print(img_tensor[0][0][0])
tensor_norm = transforms.Normalize(c_mean, c_std) # input[channel]=(input[channel]-mean[channel])/std[channel]            
img_norm = tensor_norm(img_tensor)  
print(img_norm[0][0][0])
print(f"tensor shape:{img_tensor.shape}")  # CHW
print(f"size(0):{img_tensor.size(0)}")

# # 计算图像的均值和标准差
# img_mean = img_tensor.mean(dim=[1, 2])
# img_std = img_tensor.std(dim=[1, 2])

# print("Image mean:", img_mean)
# print("Image std:", img_std)

# print(f"norm:{img_norm}")
# 手动实现transforms.Normalize.inverse()
denorm_img_tensor = img_norm.clone()
for c in range(img_tensor.size(0)):
    denorm_img_tensor[c] = denorm_img_tensor[c]*c_std[c] + c_mean[c]
# denorm_img_tensor = denorm_img_tensor*0.5+0.5
print(f"denorm:{denorm_img_tensor}")
print(denorm_img_tensor[0][0][0])

# 保存原始张量图像到本地
# 将张量转换为图像格式
# img_tensor = img_tensor.mul(0.5).add(0.5).mul(255).clamp(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
# denorm_img = denorm_img_tensor.mul(255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
image_array = denorm_img_tensor.detach().cpu().numpy()
image = Image.fromarray((image_array.transpose(1, 2, 0)*255).astype(np.uint8))


# 使用 OpenCV 保存图像
# cv2.imwrite('./img_save/img_denorm_tensor.jpg', denorm_img)
# 使用PIL保存图像
image.save('./img_save/img_denorm_tensor.jpg')


# 不需要关闭 writer，因为我们没有使用它
# writer.close()


origin:tensor([[[0.3804, 0.3804, 0.3843,  ..., 0.3412, 0.3373, 0.3333],
         [0.3765, 0.3804, 0.3843,  ..., 0.3529, 0.3490, 0.3451],
         [0.3804, 0.3804, 0.3843,  ..., 0.3725, 0.3686, 0.3647],
         ...,
         [0.6078, 0.6078, 0.6118,  ..., 0.4627, 0.4627, 0.4627],
         [0.5882, 0.5922, 0.5922,  ..., 0.4588, 0.4588, 0.4588],
         [0.5804, 0.5804, 0.5843,  ..., 0.4549, 0.4549, 0.4549]],

        [[0.4667, 0.4667, 0.4706,  ..., 0.4039, 0.4000, 0.3961],
         [0.4706, 0.4667, 0.4706,  ..., 0.3922, 0.3882, 0.3843],
         [0.4745, 0.4745, 0.4784,  ..., 0.3804, 0.3765, 0.3725],
         ...,
         [0.5961, 0.5961, 0.6000,  ..., 0.4588, 0.4588, 0.4588],
         [0.5882, 0.5922, 0.5922,  ..., 0.4549, 0.4549, 0.4549],
         [0.5804, 0.5804, 0.5804,  ..., 0.4510, 0.4510, 0.4510]],

        [[0.4157, 0.4157, 0.4196,  ..., 0.3608, 0.3569, 0.3529],
         [0.4196, 0.4157, 0.4196,  ..., 0.3569, 0.3529, 0.3490],
         [0.4235, 0.4235, 0.4235,  ..., 0.3608, 0.3

True

In [41]:
# 测试tensor的操作
import torch
from torchvision import transforms
# 创建一个3x3x3的测试Tensor
test_tensor = torch.tensor([[[1, 2, 3], 
                            [4, 5, 6],
                            [7, 8, 9]],
                           [[10, 11, 12],
                            [13, 14, 15],
                            [16, 17, 18]],
                           [[19, 20, 21],
                            [22, 23, 24],
                            [25, 26, 27]]]).float()

print(f"shape:{test_tensor.shape}, Original Tensor:{test_tensor}")

tensor_norm = transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5]) # input[channel]=(input[channel]-mean[channel])/std[channel]            
img_norm = tensor_norm(test_tensor)  
print(f"type:{type(img_norm)}, img_norm:{img_norm}")

img_denorm = img_norm*0.5 + 0.5
print(f"type:{type(img_denorm)}, img_denorm:{img_denorm}")


shape:torch.Size([3, 3, 3]), Original Tensor:tensor([[[ 1.,  2.,  3.],
         [ 4.,  5.,  6.],
         [ 7.,  8.,  9.]],

        [[10., 11., 12.],
         [13., 14., 15.],
         [16., 17., 18.]],

        [[19., 20., 21.],
         [22., 23., 24.],
         [25., 26., 27.]]])
type:<class 'torch.Tensor'>, img_norm:tensor([[[ 1.,  3.,  5.],
         [ 7.,  9., 11.],
         [13., 15., 17.]],

        [[19., 21., 23.],
         [25., 27., 29.],
         [31., 33., 35.]],

        [[37., 39., 41.],
         [43., 45., 47.],
         [49., 51., 53.]]])
type:<class 'torch.Tensor'>, img_denorm:tensor([[[ 1.,  2.,  3.],
         [ 4.,  5.,  6.],
         [ 7.,  8.,  9.]],

        [[10., 11., 12.],
         [13., 14., 15.],
         [16., 17., 18.]],

        [[19., 20., 21.],
         [22., 23., 24.],
         [25., 26., 27.]]])


## Resize裁剪

In [69]:
import torch
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from PIL import Image
import cv2
import numpy as np

# help(torch.tensor)

img_path = "/storage/pt/AW_STUDY/pytorch-tutorial-tudui/hymenoptera_data/train/ants/5650366_e22b7e1065.jpg"
img = Image.open(img_path)
print(img)

trans_totensor = transforms.ToTensor() 
img_tensor = trans_totensor(img)  

trans_resize = transforms.Resize((512,512)) #
trans_resize_2 = transforms.Resize(512) # 等比缩放
# PIL数据类型的 img -> resize -> PIL数据类型的 img_resize
# img_resize = trans_resize(img)
img_resize = trans_resize_2(img)
# PIL 数据类型的 PIL -> totensor -> img_resize tensor
img_resize = trans_totensor(img_resize)
print(img_resize.size()) # PIL类型的图片原始比例为 3×512×512，3通道

def tensor_to_imgsave(tensor_img, save_path):
    image_array = tensor_img.detach().cpu().numpy()
    image = Image.fromarray((image_array.transpose(1, 2, 0)*255).astype(np.uint8))
    image.save(save_path)

tensor_to_imgsave(img_resize, "./img_save/img_resize_scale.jpg")
    

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x375 at 0x7F439868D5E0>
torch.Size([3, 512, 682])


In [None]:
import torch
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from PIL import Image
import cv2
import numpy as np

img_path = "/storage/pt/AW_STUDY/pytorch-tutorial-tudui/hymenoptera_data/train/ants/5650366_e22b7e1065.jpg"
img = Image.open(img_path)
print(img)

# trans_totensor = transforms.ToTensor() 
# img_tensor = trans_totensor(img)  

trans_resize = transforms.Resize((512, 512))
trans_resize_2 = transforms.Resize(512) # 等比缩放
trans_random = transforms.RandomCrop(312) # 随即裁剪成 312×312 的
trans_random = transforms.RandomCrop((312,100))  # 指定随即裁剪的高和宽 

# img_resize = trans_random(img)

# # Save the resized image
# save_path = "random_resized_image.jpg"
# img_resize.save(save_path)
# print(f"Resized image saved to: {save_path}")


for i in range(10):
    img_random_resize = trans_random(img)
    img_random_resize.save(f"./img_save/random_size_{i}.jpg")
    print(img_random_resize.size) # 宽和高
    

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x375 at 0x7F446C5095E0>
(100, 312)
(100, 312)
(100, 312)
(100, 312)
(100, 312)
(100, 312)
(100, 312)
(100, 312)
(100, 312)
(100, 312)
