In [1]:
from diffusers import DiffusionPipeline
import torch
import torch.nn as nn
import os

from diffusers import PNDMScheduler

# 创建 PNDMScheduler 实例
scheduler = PNDMScheduler(
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    num_train_timesteps=1000,
    prediction_type="epsilon",
    set_alpha_to_one=False,
    skip_prk_steps=True,
    steps_offset=1,
    trained_betas=None
)

# 输出scheduler的配置，确保正确设置
print(scheduler)

  from .autonotebook import tqdm as notebook_tqdm


PNDMScheduler {
  "_class_name": "PNDMScheduler",
  "_diffusers_version": "0.12.1",
  "beta_end": 0.012,
  "beta_schedule": "scaled_linear",
  "beta_start": 0.00085,
  "num_train_timesteps": 1000,
  "prediction_type": "epsilon",
  "set_alpha_to_one": false,
  "skip_prk_steps": true,
  "steps_offset": 1,
  "trained_betas": null
}



In [2]:
import pandas as pd
from PIL import Image
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

# 图片和数据文件的路径
image_dir = r"/data/run01/scz0ruj/picturedata1/"
data_file = r"/data/run01/scz0ruj/reduced_data_output.xlsx"

# 读取 Excel 文件
data_df = pd.read_excel(data_file, nrows=16000)

class CustomDataset(Dataset):
    def __init__(self, images_dir, dataframe):
        self.images_dir = images_dir
        self.dataframe = dataframe
        self.transform = transforms.Compose([
            transforms.Resize(256, interpolation=transforms.InterpolationMode.BILINEAR),
            transforms.CenterCrop(256),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])
        
        # 计算条件数据的均值和标准差
        condition_data = dataframe.iloc[:, 1:5]  # 假设条件数据位于第二列到第五列
        self.condition_mean = condition_data.mean().values
        self.condition_std = condition_data.std().values
        print(self.condition_mean)
        print(self.condition_std)
        
        
    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = f"lcy{self.dataframe.iloc[idx, 0]}.jpg"
        img_path = os.path.join(self.images_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        image = self.transform(image)
        conditions = torch.tensor(self.dataframe.iloc[idx, 1:5].values, dtype=torch.float)
        # 应用条件数据的归一化
        conditions = (conditions - torch.tensor(self.condition_mean)) / torch.tensor(self.condition_std)
        # 使用浮点类型初始化向量，长度为77
        zeroconditions = torch.zeros(4, dtype=torch.float)

        # 将包含四个数字的列表转换为浮点型张量，并赋值给向量的前四个位置
        zeroconditions[:4] = torch.tensor(conditions, dtype=torch.float)
        
        return {'pixel_values': image, 'input_ids': zeroconditions}

# 创建数据集实例
dataset = CustomDataset(image_dir, data_df)
print(len(dataset))

# 创建数据集实例
#dataset = CustomDataset(image_dir, data_df)


# 创建 DataLoader
#dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=0)
# 获取并打印第一批数据的形状
#for batch in dataloader:
#    pixel_values = batch['pixel_values']
#    input_ids = batch['input_ids']
#    print("Batch pixel_values shape:", pixel_values.shape)
#    print("Batch input_ids shape:", input_ids)
#    break

[3.84952491e+02 2.43981134e+02 1.12512536e+00 1.84848683e-01]
[1.19021432e+02 1.23790469e+02 1.13191379e+00 2.54632684e-02]
15959


In [3]:
import torch
from torch.utils.data import DataLoader
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# 定义 collate_fn 函数
def collate_fn(data):
    pixel_values = [i['pixel_values'] for i in data]
    input_ids = [i['input_ids'] for i in data]

    # 将列表的数据堆叠成一个新的Tensor，并转移到设备上
    pixel_values = torch.stack(pixel_values).to(device)
    input_ids = torch.stack(input_ids).to(device)
    
    return {'pixel_values': pixel_values, 'input_ids': input_ids}

# 创建 DataLoader 实例
loader = DataLoader(dataset,
                    shuffle=True,
                    collate_fn=collate_fn,
                    batch_size=32,
                    num_workers=0)  # 可以根据实际情况调整 num_workers


# 获取 DataLoader 的长度，这是根据数据集的大小和批次大小计算得出的总批次数量
print("Total batches:", len(loader))

# 使用 next(iter(loader)) 获取第一个批次的数据
first_batch = next(iter(loader))
print("First batch pixel values shape:", first_batch['pixel_values'].shape)
print("First batch input ids shape:", first_batch['input_ids'].shape)
print("First batch input ids shape:", first_batch['input_ids'])

Total batches: 499


  zeroconditions[:4] = torch.tensor(conditions, dtype=torch.float)


First batch pixel values shape: torch.Size([32, 3, 256, 256])
First batch input ids shape: torch.Size([32, 4])
First batch input ids shape: tensor([[-5.0728e-01, -1.1266e+00, -8.0486e-01, -6.4399e-01],
        [ 1.4553e+00,  1.6179e+00,  1.8313e+00,  7.7973e-01],
        [-9.4184e-02,  1.0695e-01, -2.5038e-01,  2.8159e-01],
        [-8.6248e-02,  3.6250e-01, -1.8620e-01,  9.7174e-01],
        [-5.7137e-02,  3.8759e-01, -2.3758e-01,  1.2371e+00],
        [-3.6755e-02,  4.7165e-04, -4.3205e-01,  3.0025e-01],
        [-6.1543e-02,  2.9760e-01, -7.3897e-02,  2.1439e-01],
        [-3.3004e-01,  1.9199e-01, -4.2557e-01,  3.4050e-01],
        [-4.5301e-01, -3.6887e-01, -4.7121e-01, -1.7193e-01],
        [-1.3936e+00, -1.4224e+00, -8.3864e-01, -1.3361e+00],
        [-8.8614e-01, -6.1807e-01, -5.8472e-01,  1.4315e-01],
        [-3.4177e-01, -3.0577e-02, -2.8006e-01,  7.7951e-01],
        [-1.9946e-01, -6.3574e-01, -6.5613e-01, -1.0243e-01],
        [ 3.0547e-01,  4.5344e-01, -1.5514e-01,  6.567

In [4]:
#加载模型
%run 2.vae.ipynb
%run 3.Unet.ipynb

#准备训练
vae.requires_grad_(False)
unet.requires_grad_(True)
device = 'cuda' if torch.cuda.is_available() else 'cpu'


vae.eval()
unet.train()


vae.to(device)
unet.to(device)

#encoder.to(device)
#vae.to(device)
#unet.to(device)
all_parameters = list(unet.parameters())
optimizer = torch.optim.AdamW(all_parameters,
                              lr=1e-4,
                              betas=(0.9, 0.999),
                              weight_decay=0.1,
                              eps=1e-8)

criterion = torch.nn.MSELoss()
optimizer, criterion

(AdamW (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     eps: 1e-08
     foreach: None
     lr: 0.0001
     maximize: False
     weight_decay: 0.1
 ),
 MSELoss())

In [5]:
def get_loss(data):
    input_ids = data['input_ids']
    expanded_input_ids = input_ids.unsqueeze(-1).repeat(1, 1, 768)
    
    
    with torch.no_grad():

        #抽取图像特征图
        #[1, 3, 512, 512] -> [1, 4, 64, 64]
        out_vae = vae.encoder(data['pixel_values'])
        h,mean,logvar = vae.sample(out_vae)
        # 假设out_vae的第一个元素是你需要的张量
        out_vae = h  # 选择第一个元素，这应该是一个张量


        #0.18215 = vae.config.scaling_factor
        out_vae = out_vae * 0.18215

    #随机数,unet的计算目标
    noise = torch.randn_like(out_vae)
    noise.to(device)

    #往特征图中添加噪声
    #1000 = scheduler.num_train_timesteps
    #1 = batch size
    noise_step = torch.randint(0, 1000, (1, )).long().to(device)
    out_vae_noise = scheduler.add_noise(out_vae, noise, noise_step)

    #根据文字信息,把特征图中的噪声计算出来
    out_encoder = expanded_input_ids
    out_unet = unet(out_vae=out_vae_noise,
                    out_encoder=out_encoder,
                    time=noise_step)

    #计算mse loss
    #[1, 4, 64, 64],[1, 4, 64, 64]
    return criterion(out_unet, noise)


## get_loss({
##     'input_ids': torch.ones(1, 77, device=device).long(),
##     'pixel_values': torch.randn(1, 3, 512, 512, device=device)
# })

In [6]:
def train():
    loss_sum = 0
    for epoch in range(100):
        for i, data in enumerate(loader):
            loss = get_loss(data)
            loss.backward()
            loss_sum += loss.item()
            
            torch.nn.utils.clip_grad_norm_(all_parameters, 1.0)
            optimizer.step()
            optimizer.zero_grad()

        if epoch % 1 == 0:
            print(epoch, loss_sum)
            loss_sum = 0

    #torch.save(unet.to('cpu'), 'saves/unet.model')
train()

  zeroconditions[:4] = torch.tensor(conditions, dtype=torch.float)


0 46.989921575877815
1 44.31479498371482
2 35.63570671796333
3 33.48140615748707
4 41.79175331525039
5 41.65768559387652
6 33.60054513416253
7 42.62671667133691
8 39.92209193389863
9 35.359812349663116
10 36.191959898453206
11 34.264407019363716
12 37.67601086740615
13 34.28923923627008
14 41.027425063424744
15 32.75533305306453
16 35.92075070901774
17 38.75645446800627
18 33.80359188944567
19 33.95740032414324
20 36.30861529381946
21 40.79492051945999
22 34.418898941599764
23 31.623108579864493
24 37.77568921307102
25 31.893371217505774
26 31.93035977173713
27 35.12924008499249
28 32.43927613503183
29 32.47857268858934
30 32.08791530402959
31 42.48215850981069
32 34.43062313081464
33 34.015066479565576
34 32.3360882535635
35 37.55802712656441
36 38.10507327149389
37 41.21301821930683
38 31.858381258993177
39 31.997658641601447
40 33.75054135595565
41 41.39536404219689
42 32.05412896105554
43 32.57255116890883
44 37.86015860995394
45 33.159710001869826
46 34.73225807829294
47 32.405409

In [7]:
# 保存整个模型
model_path = "/data/run01/scz0ruj/model/lcynew_unet_model1.pth"  # 替换为你的文件路径
torch.save(unet.to('cpu'), model_path)
print(f"Model saved to {model_path}")

# 仅保存模型参数
parameters_path = "/data/run01/scz0ruj/model/lcynew_unet_model_parameters1.pth"  # 替换为你的文件路径
torch.save(unet.to('cpu').state_dict(), parameters_path)
print(f"Model parameters saved to {parameters_path}")

Model saved to /data/run01/scz0ruj/model/lcynew_unet_model1.pth
Model parameters saved to /data/run01/scz0ruj/model/lcynew_unet_model_parameters1.pth


In [8]:
# 保存整个模型
#model_path = "/data/run01/scz0ruj/model/encoder_model2.pth"  # 替换为你的文件路径
#torch.save(encoder.to('cpu'), model_path)
#print(f"Model saved to {model_path}")

# 仅保存模型参数
#parameters_path = "/data/run01/scz0ruj/model/lcynew_encoder_model_parameters.pth"  # 替换为你的文件路径
#torch.save(encoder.to('cpu').state_dict(), parameters_path)
#print(f"Model parameters saved to {parameters_path}")