In [None]:
# Train data path | 设置训练用模型、图片
pretrained_model = "./sd-models/model.ckpt" # base model path | 底模路径
train_data_dir = "./train/aki" # train dataset path | 训练数据集路径

# Train related params | 训练相关参数
resolution = "512,512" # image resolution w,h. 图片分辨率，宽,高。支持非正方形，但必须是 64 倍数。
batch_size = 1 # batch size
max_train_epoches = 10 # max train epoches | 最大训练 epoch
save_every_n_epochs = 2 # save every n epochs | 每 N 个 epoch 保存一次
network_dim = 32 # network dim | 常用 4~128，不是越大越好
network_alpha= 32 # network alpha | 常用与 network_dim 相同的值或者采用较小的值，如 network_dim的一半 防止下溢。默认值为 1，使用较小的 alpha 需要提升学习率。
clip_skip = 2 # clip skip | 玄学 一般用 2
train_unet_only = 0 # train U-Net only | 仅训练 U-Net，开启这个会牺牲效果大幅减少显存使用。6G显存可以开启
train_text_encoder_only = 0 # train Text Encoder only | 仅训练 文本编码器

# Learning rate | 学习率
lr = "1e-4"
unet_lr = "1e-4"
text_encoder_lr = "1e-5"
lr_scheduler = "cosine_with_restarts" # "linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"

# Output settings | 输出设置
output_name = "aki" # output model name | 模型保存名称
save_model_as = "safetensors" # model save ext | 模型保存格式 ckpt, pt, safetensors

In [None]:
!accelerate launch --num_cpu_threads_per_process=8 "./scripts/train_network.py" \
  --enable_bucket \
  --pretrained_model_name_or_path=$pretrained_model \
  --train_data_dir=$train_data_dir \
  --output_dir="./output" \
  --logging_dir="./logs" \
  --resolution=$resolution \
  --network_module=networks.lora \
  --max_train_epochs=$max_train_epoches \
  --learning_rate=$lr \
  --unet_lr=$unet_lr \
  --text_encoder_lr=$text_encoder_lr \
  --network_dim=$network_dim \
  --network_alpha=$network_alpha \
  --output_name=$output_name \
  --lr_scheduler=$lr_scheduler \
  --train_batch_size=$batch_size \
  --save_every_n_epochs=$save_every_n_epochs \
  --mixed_precision="fp16" \
  --save_precision="fp16" \
  --seed="1337" \
  --cache_latents \
  --clip_skip=$clip_skip \
  --prior_loss_weight=1 \
  --max_token_length=225 \
  --caption_extension=".txt" \
  --save_model_as=$save_model_as \
  --xformers --shuffle_caption --use_8bit_adam

In [5]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("没有GPU！请在菜单中选择：代码执行程序 → 更改运行时类型 → T4 GPU")

CUDA available: True
GPU: Tesla T4
GPU Memory: 14.7 GB


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
%%bash
# 检查Google Drive中的训练数据
ls -lh /content/drive/MyDrive/lora_training/

ls: cannot access '/content/drive/MyDrive/lora_training/': No such file or directory


CalledProcessError: Command 'b'# \xe6\xa3\x80\xe6\x9f\xa5Google Drive\xe4\xb8\xad\xe7\x9a\x84\xe8\xae\xad\xe7\xbb\x83\xe6\x95\xb0\xe6\x8d\xae\nls -lh /content/drive/MyDrive/lora_training/\n'' returned non-zero exit status 2.

In [5]:
%%bash
ls -la /content/drive/
ls -la /content/drive/MyDrive/
find /content/drive/MyDrive/ -iname "*lora*" -type d

total 12
drwx------ 2 root root 4096 Jan 21 05:06 MyDrive
dr-x------ 2 root root 4096 Jan 21 05:06 .shortcut-targets-by-id
drwx------ 5 root root 4096 Jan 21 05:06 .Trash-0
total 5
drwx------ 2 root root 4096 Jan 20 14:51 lora_training
-rw------- 1 root root  179 Dec 14 06:42 中文翻译为英文，姓名转为拼音.gsheet
-rw------- 1 root root  179 Dec 14 06:21 将该表排版优化一下，原列内容不要更改.gsheet
/content/drive/MyDrive/lora_training


In [6]:
 %%bash
  # 查看lora_training文件夹内容
  ls -lh /content/drive/MyDrive/lora_training/

total 3.1M
-rw------- 1 root root 3.1M Jan 20 14:47 hanxiao_m1_dataset.zip


In [7]:
  %%bash
  # 解压训练数据
  cd /content
  unzip -q /content/drive/MyDrive/lora_training/hanxiao_m1_dataset.zip -d /content/

  # 创建正确的文件夹结构（关键！）
  mkdir -p /content/train_data_fixed/12_hanxiao

  # 移动图片到正确位置
  find /content/hanxiao_m1 -name "*.png" -exec mv {} /content/train_data_fixed/12_hanxiao/ \;

  # 验证结果
  echo "=== 数据结构验证 ==="
  ls -la /content/train_data_fixed/12_hanxiao/
  echo ""
  echo "=== 图片数量统计 ==="
  ls /content/train_data_fixed/12_hanxiao/*.png | wc -l

=== 数据结构验证 ===
total 8
drwxr-xr-x 2 root root 4096 Jan 21 05:10 .
drwxr-xr-x 3 root root 4096 Jan 21 05:10 ..

=== 图片数量统计 ===
0


find: ‘/content/hanxiao_m1’: No such file or directory
ls: cannot access '/content/train_data_fixed/12_hanxiao/*.png': No such file or directory


In [9]:
 %%bash
 ls -la /content/
 echo "---"
 find /content -name "*.png" -type f 2>/dev/null | head -20

total 3148
drwxr-xr-x 1 root root   4096 Jan 21 05:10 .
drwxr-xr-x 1 root root   4096 Jan 21 04:54 ..
-rw-r--r-- 1 root root 271093 Jan 20 22:33 001_hanxiao_front_view_standing_neutral_expression_f.png
-rw-r--r-- 1 root root 278021 Jan 20 22:33 002_hanxiao_front_view_standing_alert_expression_ful.png
-rw-r--r-- 1 root root 255929 Jan 20 22:33 003_hanxiao_front_view_standing_tired_expression_ful.png
-rw-r--r-- 1 root root 183208 Jan 20 22:34 004_hanxiao_front_view_standing_fearful_expression_f.png
-rw-r--r-- 1 root root 256650 Jan 20 22:34 005_hanxiao_side_view_standing_calm_expression_full_.png
-rw-r--r-- 1 root root 237358 Jan 20 22:34 006_hanxiao_side_view_standing_vigilant_expression_f.png
-rw-r--r-- 1 root root 166516 Jan 20 22:36 009_hanxiao_back_view_standing_full_body.png
-rw-r--r-- 1 root root 239679 Jan 20 22:36 010_hanxiao_front_view_sitting_weary_expression_uppe.png
-rw-r--r-- 1 root root 328137 Jan 20 22:36 011_hanxiao_side_view_sitting_tense_expression_upper.png
-rw-r--r--

In [None]:
  %%bash                                                                                                   mv /content/*hanxiao*.png /content/train_data_fixed/12_hanxiao/
  ls -la /content/train_data_fixed/12_hanxiao/
  echo "---"
  ls /content/train_data_fixed/12_hanxiao/*.png | wc -l