# SWIFT在【基础视觉领域】的应用

#### 使用样例见：https://github.com/modelscope/swift/tree/main/examples/pytorch/cv/notebook

## 1. 图像分类任务

### 1.1 安装与导入包
- 安装必要的依赖安装包
```bash
pip install 'ms-swift[aigc]' -U
pip install modelscope
```
- 导入必要的依赖安装包


In [None]:
# basic / third-party
import os
import tempfile
import torch
import torchvision
import transformers

# SWIFT
from swift import Swift, SwiftModel, snapshot_download, push_to_hub
from swift import AdapterConfig, LoRAConfig, PromptConfig, SideConfig, ResTuningConfig

# Modelscope
import modelscope
from modelscope.pipelines import pipeline
from modelscope.models import Model
from modelscope.utils.config import Config
from modelscope.metainfo import Trainers
from modelscope.msdatasets import MsDataset
from modelscope.trainers import build_trainer
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile


### 1.2 数据集
- [基础模型基准评测集 (FME Benchmark)](https://modelscope.cn/datasets/damo/foundation_model_evaluation_benchmark/dataPeview) 子集 - Oxford Flowers

| 序号 |                                   数据集                                  |   描述   | 类别数量 | 训练集数量 | 验证集数量 | 测试集数量 |                                                    样例                                                   |                                             备注                                             |
|:----:|:-------------------------------------------------------------------------:|:--------:|:--------:|:----------:|:----------:|:----------:|:---------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------:|
|  1  |   [Oxford   Flowers](https://www.robots.ox.ac.uk/~vgg/data/flowers/102/)  |   花卉   |    102   |    1020    |    1020    |    6149    |            <img decoding="async"   src="resources/images/OxfordFlowers102_image_00001.jpeg" width=50>           | [预览](https://modelscope.cn/datasets/damo/foundation_model_evaluation_benchmark/dataPeview) |
- 加载数据集

In [None]:
num_classes = 102
CLASSES = ['pink primrose', 'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea', 'english marigold', 'tiger lily', 'moon orchid', 'bird of paradise', 'monkshood', 'globe thistle', 'snapdragon', "colt's foot", 'king protea', 'spear thistle', 'yellow iris', 'globe-flower', 'purple coneflower', 'peruvian lily', 'balloon flower', 'giant white arum lily', 'fire lily', 'pincushion flower', 'fritillary', 'red ginger', 'grape hyacinth', 'corn poppy', 'prince of wales feathers', 'stemless gentian', 'artichoke', 'sweet william', 'carnation', 'garden phlox', 'love in the mist', 'mexican aster', 'alpine sea holly', 'ruby-lipped cattleya', 'cape flower', 'great masterwort', 'siam tulip', 'lenten rose', 'barbeton daisy', 'daffodil', 'sword lily', 'poinsettia', 'bolero deep blue', 'wallflower', 'marigold', 'buttercup', 'oxeye daisy', 'common dandelion', 'petunia', 'wild pansy', 'primula', 'sunflower', 'pelargonium', 'bishop of llandaff', 'gaura', 'geranium', 'orange dahlia', 'pink-yellow dahlia?', 'cautleya spicata', 'japanese anemone', 'black-eyed susan', 'silverbush', 'californian poppy', 'osteospermum', 'spring crocus', 'bearded iris', 'windflower', 'tree poppy', 'gazania', 'azalea', 'water lily', 'rose', 'thorn apple', 'morning glory', 'passion flower', 'lotus', 'toad lily', 'anthurium', 'frangipani', 'clematis', 'hibiscus', 'columbine', 'desert-rose', 'tree mallow', 'magnolia', 'cyclamen', 'watercress', 'canna lily', 'hippeastrum', 'bee balm', 'ball moss', 'foxglove', 'bougainvillea', 'camellia', 'mallow', 'mexican petunia', 'bromelia', 'blanket flower', 'trumpet creeper', 'blackberry lily']
img_test = "resources/images/OxfordFlowers102_image_00001.jpeg"
train_dataset = MsDataset.load(
    'foundation_model_evaluation_benchmark',
    namespace='damo',
    subset_name='OxfordFlowers',
    split='train')

eval_dataset = MsDataset.load(
    'foundation_model_evaluation_benchmark',
    namespace='damo',
    subset_name='OxfordFlowers',
    split='eval')

### 1.3 一站式训练 [Modelscope + Swift]

#### Vision Transformers (ViT)

<img src="resources/images/vit.jpg" width="800" align="middle" />

#### Swift - ViT - Adapter

<img src="resources/images/adapter.png" width="500" align="middle" />

#### 1.3.1 使用modelscope加载ViT模型

In [None]:
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-base'
task = 'vision-efficient-tuning'
revision = 'v1.0.2'

model_dir = snapshot_download(model_id)
cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))
cfg_dict.model.head.num_classes = num_classes
cfg_dict.CLASSES = CLASSES
model = Model.from_pretrained(model_id, task=task, cfg_dict=cfg_dict, revision=revision)

#### 1.3.2 查看模型信息

In [None]:
print(model)

In [None]:
module_keys = [key for key, _ in model.named_modules()]
print(module_keys)

#### 1.3.3 配置SwiftConfig + 模型准备

In [None]:
# model.backbone.blocks.0.mlp ~ model.backbone.blocks.11.mlp
adapter_config = AdapterConfig(
    dim=768,
    hidden_pos=0,
    target_modules=r'.*blocks\.\d+\.mlp$',
    adapter_length=10
)
model = Swift.prepare_model(model, config=adapter_config)

#### 1.3.4 查看微调模型信息

In [None]:
print(model)
print(model.get_trainable_parameters())

#### 1.3.5 训练与评测

In [None]:
def cfg_modify_fn(cfg):
    cfg.model.head.num_classes = num_classes
    cfg.model.finetune = True
    cfg.CLASSES = CLASSES
    cfg.train.max_epochs = 5
    cfg.train.lr_scheduler.T_max = 10
    return cfg

work_dir = "tmp/cv_swift_adapter"
kwargs = dict(
    model=model,
    cfg_file=os.path.join(model_dir, 'configuration.json'),
    work_dir=work_dir,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    cfg_modify_fn=cfg_modify_fn,
)

trainer = build_trainer(name=Trainers.vision_efficient_tuning, default_args=kwargs)
trainer.train()
result = trainer.evaluate()
print(f'Vision-efficient-tuning-adapter train output: {result}.')
print(os.system("nvidia-smi"))
torch.cuda.empty_cache()
del trainer
del model

### 1.4 Parameter-Efficient Tuners

#### Swift - ViT - Prompt

<img src="resources/images/prompt.png" width="500" align="middle" />

#### 1.4.1 模型准备

In [None]:
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-base'
task = 'vision-efficient-tuning'
revision = 'v1.0.2'

model_dir = snapshot_download(model_id)
cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))
cfg_dict.model.head.num_classes = num_classes
cfg_dict.CLASSES = CLASSES
model = Model.from_pretrained(model_id, task=task, cfg_dict=cfg_dict, revision=revision)

prompt_config = PromptConfig(
    dim=768,
    target_modules=r'.*blocks\.\d+$', 
    embedding_pos=0, 
    prompt_length=10,
    attach_front=False
)

model = Swift.prepare_model(model, config=prompt_config)

print(model.get_trainable_parameters())

#### 1.4.2 模型训练

In [None]:
def cfg_modify_fn(cfg):
    cfg.model.head.num_classes = num_classes
    cfg.model.finetune = True
    cfg.CLASSES = CLASSES
    cfg.train.max_epochs = 5
    cfg.train.lr_scheduler.T_max = 10
    return cfg

work_dir = "tmp/cv_swift_prompt"
kwargs = dict(
    model=model,
    cfg_file=os.path.join(model_dir, 'configuration.json'),
    work_dir=work_dir,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    cfg_modify_fn=cfg_modify_fn,
)

trainer = build_trainer(name=Trainers.vision_efficient_tuning, default_args=kwargs)
trainer.train()
result = trainer.evaluate()
print(f'Vision-efficient-tuning-prompt train output: {result}.')
print(os.system("nvidia-smi"))
torch.cuda.empty_cache()
del trainer
del model

### 1.5 Memory-Efficient Tuners

#### Swift - ViT - Res-Tuning

<img src="resources/images/restuningbypass.png" width="700" align="middle" />

*Res-Tuning: A Flexible and Efficient Tuning Paradigm via Unbinding Tuner from Backbone*

#### 1.5.1 模型准备

In [None]:
model_id = 'damo/cv_vitb16_classification_vision-efficient-tuning-base'
task = 'vision-efficient-tuning'
revision = 'v1.0.2'

model_dir = snapshot_download(model_id)
cfg_dict = Config.from_file(os.path.join(model_dir, ModelFile.CONFIGURATION))
cfg_dict.model.head.num_classes = num_classes
cfg_dict.CLASSES = CLASSES
model = Model.from_pretrained(model_id, task=task, cfg_dict=cfg_dict, revision=revision)

restuning_config = ResTuningConfig(
    dims=768,
    root_modules=r'.*backbone.blocks.0$',
    stem_modules=r'.*backbone.blocks\.\d+$',
    target_modules=r'.*backbone.norm',
    target_modules_hook='input',
    tuner_cfg='res_adapter',
)

model = Swift.prepare_model(model, config=restuning_config)

print(model.get_trainable_parameters())

#### 1.5.2 模型训练

In [None]:
def cfg_modify_fn(cfg):
    cfg.model.head.num_classes = num_classes
    cfg.model.finetune = True
    cfg.CLASSES = CLASSES
    cfg.train.max_epochs = 5
    cfg.train.lr_scheduler.T_max = 10
    return cfg

work_dir = "tmp/cv_swift_restuning"
kwargs = dict(
    model=model,
    cfg_file=os.path.join(model_dir, 'configuration.json'),
    work_dir=work_dir,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    cfg_modify_fn=cfg_modify_fn,
)

trainer = build_trainer(name=Trainers.vision_efficient_tuning, default_args=kwargs)
trainer.train()
result = trainer.evaluate()
print(f'Vision-efficient-tuning-restuning train output: {result}.')
print(os.system("nvidia-smi"))
torch.cuda.empty_cache()
del trainer
del model

### 1.6 更多基础模型及工具包使用样例

#### 1.6.1 Transformers

安装依赖包：pip install transformers

In [None]:
# 创建模型
from transformers import AutoModelForImageClassification

model_dir = snapshot_download("AI-ModelScope/vit-base-patch16-224")
model = AutoModelForImageClassification.from_pretrained(model_dir)
module_keys = [key for key, _ in model.named_modules()]
print(module_keys)

In [None]:
# 创建微调模型
prompt_config = PromptConfig(
    dim=768,
    target_modules=r'.*layer\.\d+$', 
    embedding_pos=0, 
    prompt_length=10, 
    attach_front=False 
)

adapter_config = AdapterConfig(
    dim=768, 
    hidden_pos=0,  
    target_modules=r'.*attention.output.dense$',  
    adapter_length=10 
)

model = Swift.prepare_model(model, {"adapter_tuner": adapter_config, "prompt_tuner": prompt_config})
model.get_trainable_parameters()
print(model(torch.ones(1, 3, 224, 224)).logits.shape)

#### 1.6.2 TIMM

安装依赖包：pip install timm

In [None]:
# 创建模型
import timm

model = timm.create_model("vit_base_patch16_224", pretrained=False, num_classes=100)
module_keys = [key for key, _ in model.named_modules()]
print(module_keys)

In [None]:
# 创建模型
restuning_config = ResTuningConfig(
            dims=768,
            root_modules=r'.*blocks.0$',
            stem_modules=r'.*blocks\.\d+$',
            target_modules=r'norm',
            tuner_cfg='res_adapter'
)

model = Swift.prepare_model(model, restuning_config)
model.get_trainable_parameters()
print(model(torch.ones(1, 3, 224, 224)).shape)

### 1.7 更多任务

SWIFT提供的是对模型层面进行微调的能力，故当不同的任务采用相似的基础模型架构时，即可泛化到不同的任务中，如检测、分割等。