# 微调Qwen2.5-7B

In [1]:
import torch
import glob
import os
import json
os.environ["MODELSCOPE_CACHE"] = "/data2/dzr/.cache" 
from collections import OrderedDict, defaultdict
import math
import random
from tqdm import tqdm  # 引入 tqdm 库
import time  # 引入 time 模块
import argparse  # 引入 argparse 模块
import sys
import numpy as np
import torch.optim as optim
import torch.nn as nn
from io import BytesIO
from torch.utils.data import DataLoader, Subset, random_split
from typing import Dict, List
from modelscope import AutoTokenizer, AutoProcessor,Qwen2_5_VLForConditionalGeneration
from qwen_vl_utils import process_vision_info
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import pandas as pd
from peft import LoraConfig, get_peft_model
from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model,
)
import random
from torch.utils.data import Subset
model_ckpt = "Qwen/Qwen2.5-VL-7B-Instruct"
from functools import partial
tokenizer = AutoTokenizer.from_pretrained(model_ckpt, trust_remote_code=True)
processor = AutoProcessor.from_pretrained(model_ckpt, trust_remote_code=True)



Downloading Model from https://www.modelscope.cn to directory: /data2/dzr/.cache/models/Qwen/Qwen2.5-VL-7B-Instruct


2025-06-03 23:13:48,030 - modelscope - INFO - Target directory already exists, skipping creation.


Downloading Model from https://www.modelscope.cn to directory: /data2/dzr/.cache/models/Qwen/Qwen2.5-VL-7B-Instruct


2025-06-03 23:13:49,683 - modelscope - INFO - Target directory already exists, skipping creation.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
You have video processor config saved in `preprocessor.json` file which is deprecated. Video processor configs should be saved in their own `video_preprocessor.json` file. You can rename the file or load and save the processor back which renames it automatically. Loading from `preprocessor.json` will be removed in v5.0.


## 指定设备

In [2]:
# !pip install qwen-vl-utils[decord]==0.0.8
device = torch.device('cuda:0')

In [3]:
batch_size = 64
output_length = 3
checkpoint_dir = "/data2/dzr/finetune/train_outputprojection_checkpoints"

## Processer
### 构建多模态提示词并提取视觉输入


In [4]:
def build_prompt_and_inputs(sample: Dict, hist_steps: int = 5) -> Dict:
    """构建多模态提示词并提取视觉输入
    Args:
        sample: 包含多模态数据的样本
        hist_steps: 使用历史时间步数（默认为5）
    Returns:
        包含处理后的提示词和视觉输入的字典
    """
    # 提取并规范化路径
    def normalize_paths(path_list: List[str]) -> List[str]:
        return [os.path.normpath(p) for p in path_list]
    # 处理所有路径
    video_paths = normalize_paths(sample['video_paths'][:hist_steps])
    heatmap_paths = normalize_paths(sample['heatmap_paths'][:hist_steps])
    gps_data = sample['gps'][:hist_steps].tolist()
    
    # 构建时间序列提示词
    prompt_parts = []
    for step in range(hist_steps):
        time_label = f"t-{hist_steps-1-step}" if step < hist_steps-1 else "Current time (t)"
        
        # GPS数据格式化（假设张量存储的是经度、纬度）
        lon, lat = gps_data[step]
        gps_str = f"longitude:{lon:.6f},dimension:{lat:.6f}"
        
        # 添加多模态信息块
        prompt_part = (
            f"time:{time_label}"
            f"gps:{gps_str}"
        )
        prompt_parts.append(prompt_part)
    
    # 组合完整提示词
    full_prompt = ("".join(prompt_parts))

    # 提取所有视觉路径（RGB + 热力图）
    all_image_paths = [p for pair in zip(video_paths, heatmap_paths) for p in pair]
    
    return {
        "prompt": full_prompt,
        "image_paths": all_image_paths,
        "labels": sample['target_mmwave'].argmax(dim=-1).tolist()  # 假设索引是最大值位置
    }

# 示例使用 ---------------------------------------------------
def process_sample(sample, processor):  # 添加processor参数
    # Step 1: 构建提示词和获取图像路径
    processed = build_prompt_and_inputs(sample)
    
    # Step 2: 构建messages结构
    messages = [{
        "role": "user",
        "content": [{"type": "image", "image": path} for path in processed["image_paths"]] + 
                  [{"type": "text", "text": processed["prompt"]}]
    }]
    
    # Step 3: 使用传入的processor处理输入
    text = processor.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    image_inputs, video_inputs = process_vision_info(messages)
    
    inputs = processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt",
    ).to("cuda")

    return inputs, processed["labels"]





## Dataset

In [5]:
class CachedVisionDataset(Dataset):
    def __init__(self, original_dataset, processor):
        self.cache = []
        self.original_dataset = original_dataset  # 保存原始数据集引用
        
        # 预加载所有样本
        for i in tqdm(range(len(original_dataset)), desc="Caching dataset"):
            sample = original_dataset[i]
            try:
                inputs, labels = process_sample(sample, processor)
                # 将处理后的数据转移到CPU（避免占用GPU内存）
                inputs = {k: v.cpu() for k, v in inputs.items()}
                self.cache.append((inputs, labels))
            except Exception as e:
                print(f"Error processing sample {i}: {e}")
                # 可以选择跳过错误样本或添加占位符
    
    def __len__(self):
        """返回数据集大小 - 这是必须实现的方法"""
        return len(self.original_dataset)  # 或者 len(self.cache)
    
    def __getitem__(self, idx):
        """直接返回缓存的处理结果"""
        return self.cache[idx]
    

In [6]:
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence
from PIL import Image

class QwenVisionDataset(Dataset):
    def __init__(self, data_csv_paths, modal='mmwave_gps', input_length=8, output_length=3):
        self.data_csv_paths = data_csv_paths
        self.modal = modal
        self.input_length = input_length
        self.output_length = output_length
        
        # 特征列映射
        self.features_column = {
            # 'rgbs': 'unit1_rgb',
            'rgbs': 'unit1_camera_resized',
            'u1_loc': 'unit1_loc',
            'u2_loc': 'unit2_loc',
            'mmwave': 'unit1_pwr_60ghz',
            'heatmap': 'unit1_mmwave_heatmap'  # 新增热力图列
        }
        
        # 初始化滑动窗口
        self.window_samples = []
        for seq_idx, data_csv_path in enumerate(self.data_csv_paths):
            data_csv = pd.read_csv(data_csv_path)
            for seq_id in data_csv['seq_index'].unique():
                seq_data = data_csv[data_csv['seq_index'] == seq_id]
                if len(seq_data) >= self.input_length:
                    for start_idx in range(len(seq_data) - self.input_length + 1):
                        self.window_samples.append((seq_idx, seq_id, start_idx))

    def __len__(self):
        return len(self.window_samples)
    
    def __getitem__(self, idx):
        seq_idx, seq_id, start_idx = self.window_samples[idx]
        base_path = os.path.dirname(self.data_csv_paths[seq_idx])
        data_csv = pd.read_csv(self.data_csv_paths[seq_idx])
        seq_data = data_csv[data_csv['seq_index'] == seq_id]
        
        # 获取原始路径数据
        window_data = {
            'video_paths': 
            seq_data[self.features_column['rgbs']]
            .iloc[start_idx:start_idx+self.input_length] 
            .tolist(),
            'heatmap_paths': 
            seq_data[self.features_column['heatmap']]
            .iloc[start_idx:start_idx+self.input_length] 
            .tolist()
        }

        # 处理GPS数据
        gps = []
        for i in range(self.input_length):
            u1_loc = os.path.join(base_path, seq_data[self.features_column['u1_loc']].iloc[start_idx+i])
            u2_loc = os.path.join(base_path, seq_data[self.features_column['u2_loc']].iloc[start_idx+i])
            
            with open(u1_loc, 'r') as f:
                lat1, lon1 = map(float, f.read().strip().split())
            with open(u2_loc, 'r') as f:
                lat2, lon2 = map(float, f.read().strip().split())
                
            gps.append(torch.tensor([lat2-lat1, lon2-lon1], dtype=torch.float32))
        gps = torch.stack(gps)

        # 处理mmWave数据
        mmwave = []
        for i in range(self.input_length):
            mmwave_path = os.path.join(base_path, 
                seq_data[self.features_column['mmwave']].iloc[start_idx+i])
            with open(mmwave_path, 'r') as f:
                mmwave.append(torch.tensor(
                    list(map(float, f.read().strip().split())), 
                    dtype=torch.float32))
        mmwave = torch.stack(mmwave)

        # 目标数据（最后output_length个时间步）
        target = []
        for i in range(self.input_length-self.output_length, self.input_length):
            mmwave_path = os.path.join(base_path,
                seq_data[self.features_column['mmwave']].iloc[start_idx+i])
            with open(mmwave_path, 'r') as f:
                target.append(torch.tensor(
                    list(map(float, f.read().strip().split())),
                    dtype=torch.float32))
        target = torch.stack(target)

        return {
            'video_paths': [os.path.join(base_path, p) for p in window_data['video_paths']],
            'heatmap_paths': [os.path.join(base_path, p) for p in window_data['heatmap_paths']],
            'gps': gps,
            'mmwave': mmwave,
            'target_mmwave': target
        }

def qwen_collate_fn(batch):
    collated = {
        'video_paths': [item['video_paths'] for item in batch],
        'heatmap_paths': [item['heatmap_paths'] for item in batch],
        'gps': pad_sequence([item['gps'] for item in batch], batch_first=True),
        'mmwave': pad_sequence([item['mmwave'] for item in batch], batch_first=True),
        'target_mmwave': pad_sequence([item['target_mmwave'] for item in batch], batch_first=True)
    }
    return collated

In [7]:
def collate_fn(batch, device):
    """处理缓存数据的批处理"""
    batch_inputs = {"input_ids": [], "attention_mask": [], "pixel_values": [], "image_grid_thw": []}
    batch_labels = []
    
    for (inputs, labels) in batch:
        batch_inputs["input_ids"].append(inputs["input_ids"])
        batch_inputs["attention_mask"].append(inputs["attention_mask"])
        batch_inputs["pixel_values"].append(inputs["pixel_values"])
        batch_inputs["image_grid_thw"].append(inputs["image_grid_thw"])
        batch_labels.append(labels)
    
    # 拼接张量（保持在CPU）
    batch_inputs = {
        "input_ids": torch.cat(batch_inputs["input_ids"], dim=0),
        "attention_mask": torch.cat(batch_inputs["attention_mask"], dim=0),
        "pixel_values": torch.cat(batch_inputs["pixel_values"], dim=0),
        "image_grid_thw": torch.cat(batch_inputs["image_grid_thw"], dim=0)
    }
    batch_labels = torch.tensor(batch_labels, dtype=torch.long)
    
    return batch_inputs, batch_labels

## Model
### 用Qwen构造带有输出投影模块的模型

In [8]:
class QwenReprogPatchHeadLight(nn.Module):
    def __init__(self,
                 qwen_model: nn.Module,
                 pred_len: int = 3,       # 未来预测步数 P
                 num_beams: int = 64,     # 类别数 C
                 hidden_dim: int = 3584,  # Qwen 隐藏维度 D
                 mha_heads: int = 8,      # Multi-Head Attention 的头数
                 proj_hidden: int = 2048, # 投影层中间隐藏维度
                 dropout: float = 0.1):
        super().__init__()
        self.qwen = qwen_model
        self.P = pred_len
        self.C = num_beams
        self.D = hidden_dim

        # 冻结主干
        for p in self.qwen.parameters():
            p.requires_grad = False

        # 可训练 patch
        self.patch_init = nn.Parameter(torch.randn(self.P, self.D))

        # Patch reprogramming
        self.reprog_mha = nn.MultiheadAttention(
            embed_dim=self.D,
            num_heads=mha_heads,
            batch_first=True
        )

        # 更轻量的投影头：10752 -> 2048 -> 192
        self.classifier = nn.Sequential(
            nn.Linear(self.P * self.D, proj_hidden),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(proj_hidden, self.P * self.C),
        )
    def forward(self, input_ids, attention_mask, pixel_values, image_grid_thw):
        B = input_ids.size(0)

        # 冻结主干前向
        with torch.no_grad():
            outputs = self.qwen(
                input_ids=input_ids,
                attention_mask=attention_mask,
                pixel_values=pixel_values,
                image_grid_thw=image_grid_thw,
                output_hidden_states=True,
                return_dict=True,
            )
        history_hidden = outputs.hidden_states[-1]  # [B, L, D]

        # patch initialization
        patch = self.patch_init.unsqueeze(0).expand(B, -1, -1)  # [B, P, D]

        # reprogram
        reprog_patch, _ = self.reprog_mha(
            query=patch,
            key=history_hidden,
            value=history_hidden,
        )  # [B, P, D]

        # flatten + light projection
        flat = reprog_patch.contiguous().view(B, self.P * self.D)  # [B, 3*3584]
        logits = self.classifier(flat).view(B, self.P, self.C)     # [B, 3, 64]

        return logits

    

### 加载Qwen

In [9]:
# 配置 bfloat16 精度
qwenbf16_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    model_ckpt,
    torch_dtype=torch.bfloat16,    # 设置模型权重为 bfloat16
    trust_remote_code=True,         # 必须开启
    return_dict=True
).to(device)



Downloading Model from https://www.modelscope.cn to directory: /data2/dzr/.cache/models/Qwen/Qwen2.5-VL-7B-Instruct


2025-06-03 23:13:52,775 - modelscope - INFO - Target directory already exists, skipping creation.


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

In [10]:
model = QwenReprogPatchHeadLight(
    qwen_model=qwenbf16_model,
    pred_len=output_length,     # 预测未来 3 步
    num_beams=64,       # 类别数 64
    hidden_dim=3584,    # Qwen2.5-VL 的隐藏维度
    mha_heads=8,
    proj_hidden=2048,
    dropout=0.1
).to(device)

## 计算预测步长和时间的关系

In [11]:
dataset_start_idx_zero = 9
dataset_end_idx_zero = 10
# 定义数据集路径
dataset_path_zero = [f'/data2/wzj/Datasets/DeepSense/scenario{i}/' for i in range(dataset_start_idx_zero, dataset_end_idx_zero)]  # scenario1 ~ scenario8

data_csv_paths_zero = []
for path in dataset_path_zero:
    data_csv_paths_zero.extend(glob.glob(os.path.join(path, '*.csv')))

print(f"Found {len(data_csv_paths_zero)} CSV files for training.")

Found 1 CSV files for training.


In [12]:
zeroshot_dataset = QwenVisionDataset(
    data_csv_paths_zero,
    input_length=8,
    output_length=3,
)

In [13]:
cached_zeroshot = CachedVisionDataset(zeroshot_dataset, processor)

Caching dataset: 100%|██████████| 5012/5012 [05:28<00:00, 15.26it/s]


In [14]:
# 随机选择100个样本的索引
total_size = len(cached_zeroshot)
random.seed(42)  # 设置随机种子确保可复现
indices = list(range(total_size))
selected_indices = random.sample(indices, 640)  

# 创建子集
subset = Subset(cached_zeroshot, selected_indices)

zeroshot_dataloader = DataLoader(
    subset,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=partial(collate_fn, device="cpu"),  # 绑定设备参数
    pin_memory=True if device.type == "cuda" else False
)

In [20]:
def measure_forward_full_dataset(fn_forward, model, data_loader, device,
                                num_warmup: int = 1, num_runs: int = 5):
    """
    对“遍历整个 data_loader，对每个 batch 都 run 一次 model(inputs, tgt_seq)”进行
    预热 + N 次正式测时。返回 mean_ms, std_ms，单位是毫秒。

    - fn_forward:       你包装好的 forward(model, data_loader, device) 函数
    - model:            已加载到 device，并且 eval() 的 nn.Module
    - data_loader:      DataLoader，batch_size 已设置好
    - device:           torch.device("cuda") 或 "cpu"
    - num_warmup:       预热遍历数据集的次数，默认 1 次
    - num_runs:         正式测时次数，默认 5 次（因为跑完整个数据集通常比较慢）

    返回：
    (mean_latency_ms, std_latency_ms)
    """
    # —— 在这里打印当前 model.output_length —— #
    print(f"当前 qwen.pred_len = {model.pred_len}")

    # —— Warm-up —— #
    for _ in range(num_warmup):
        fn_forward(model, data_loader, device)

    # —— 正式测时 —— #
    latencies = []
    for _ in range(num_runs):
        if device.type == "cuda":
            torch.cuda.synchronize()
        t0 = time.time()

        fn_forward(model, data_loader, device)

        if device.type == "cuda":
            torch.cuda.synchronize()
        t1 = time.time()
        latencies.append((t1 - t0) * 1000)

    arr = np.array(latencies, dtype=np.float32)
    # 这里 len(data_loader) 就是“DataLoader 里 batch 的数量”
    num_batches = len(data_loader)
    mean_full_dataset = float(arr.mean())    # 整个数据集一次过的均耗时
    std_full_dataset  = float(arr.std())

    # 换算成“单个 batch 的平均耗时”：
    mean_per_batch = mean_full_dataset / num_batches
    std_per_batch  = std_full_dataset  / num_batches

    return mean_per_batch, std_per_batch

In [16]:
len(zeroshot_dataloader)

10

In [17]:
from torch.cuda.amp import autocast
def forward(model, data_loader,  device):
    model.eval()
    with torch.no_grad():
        for inputs,label in tqdm(data_loader, desc="forward"):
            inputs = {k: v.to(device) for k, v in inputs.items()}
            with autocast(dtype=torch.bfloat16):
                _ = model(**inputs)
    return 
          
results = {
    "Qwen2.5-VL-7B": {"mean": [], "std": []},  
}


num_warmup_runs = 1   # 预热遍历数据集的次数
num_timed_runs   = 5  # 正式测时遍历数据集的次数（可以适当增大，但会更耗时）

# 确保保存模型的目录存在
os.makedirs(checkpoint_dir, exist_ok=True) 

In [18]:
# 加载最佳模型
best_model_path = os.path.join(checkpoint_dir, 'multimodal_encoder_decoder_best.pth')
if os.path.exists(best_model_path):
    model.load_state_dict(torch.load(best_model_path, map_location=device))
    model.eval()
    print("Loaded best model for testing.")
else:
    print(f"Best model not found at {best_model_path}. Skipping test evaluation.")

Loaded best model for testing.


In [21]:
for k in range(1, 11):
    # 1) 动态修改全局 output_length
    output_length = k

    # 2) 把 k 传给两个模型，让它们内部知道“预测 k 步”
    #    - Model A 例子：QwenReprogPatchHeadLight 在 __init__ 里存了 output_length，
    #      并且 forward 里会读取它；所以这里改 output_length，。
    model.pred_len = k
    
    # —— 测 Model A 遍历整个数据集所用时间 —— #
    mean_qwen, std_qwen = measure_forward_full_dataset(
        fn_forward   = forward,
        model        = model,
        data_loader  = zeroshot_dataloader,
        device       = device,
        num_warmup   = num_warmup_runs,
        num_runs     = num_timed_runs
    )
    results["Qwen2.5-VL-7B"]["mean"].append(mean_qwen)
    results["Qwen2.5-VL-7B"]["std"].append(std_qwen)
    print(
        f"k={k:2d} | "
        f"Qwen Model: {mean_qwen:.1f}±{std_qwen:.1f} ms "
    )



当前 qwen.pred_len = 1


  with autocast(dtype=torch.bfloat16):
forward: 100%|██████████| 10/10 [08:24<00:00, 50.40s/it]
forward: 100%|██████████| 10/10 [10:48<00:00, 64.86s/it]
forward: 100%|██████████| 10/10 [10:45<00:00, 64.58s/it]
forward: 100%|██████████| 10/10 [10:45<00:00, 64.59s/it]
forward: 100%|██████████| 10/10 [10:44<00:00, 64.49s/it]
forward: 100%|██████████| 10/10 [10:43<00:00, 64.37s/it]


k= 1 | Qwen Model: 65406.0±167.2 ms 
当前 qwen.pred_len = 2


forward: 100%|██████████| 10/10 [10:42<00:00, 64.29s/it]
forward: 100%|██████████| 10/10 [10:43<00:00, 64.32s/it]
forward: 100%|██████████| 10/10 [10:43<00:00, 64.35s/it]
forward: 100%|██████████| 10/10 [10:41<00:00, 64.15s/it]
forward: 100%|██████████| 10/10 [10:40<00:00, 64.09s/it]
forward: 100%|██████████| 10/10 [10:38<00:00, 63.84s/it]


k= 2 | Qwen Model: 64987.2±198.7 ms 
当前 qwen.pred_len = 3


forward: 100%|██████████| 10/10 [10:27<00:00, 62.79s/it]
forward: 100%|██████████| 10/10 [10:25<00:00, 62.55s/it]
forward: 100%|██████████| 10/10 [10:21<00:00, 62.16s/it]
forward: 100%|██████████| 10/10 [10:21<00:00, 62.16s/it]
forward: 100%|██████████| 10/10 [10:21<00:00, 62.13s/it]
forward: 100%|██████████| 10/10 [10:20<00:00, 62.00s/it]


k= 3 | Qwen Model: 62999.8±202.4 ms 
当前 qwen.pred_len = 4


forward: 100%|██████████| 10/10 [10:18<00:00, 61.88s/it]
forward: 100%|██████████| 10/10 [10:18<00:00, 61.81s/it]
forward: 100%|██████████| 10/10 [10:16<00:00, 61.69s/it]
forward: 100%|██████████| 10/10 [10:18<00:00, 61.89s/it]
forward: 100%|██████████| 10/10 [10:16<00:00, 61.64s/it]
forward: 100%|██████████| 10/10 [10:16<00:00, 61.63s/it]


k= 4 | Qwen Model: 62503.1±111.1 ms 
当前 qwen.pred_len = 5


forward: 100%|██████████| 10/10 [10:21<00:00, 62.12s/it]
forward: 100%|██████████| 10/10 [10:19<00:00, 61.93s/it]
forward: 100%|██████████| 10/10 [10:17<00:00, 61.75s/it]
forward: 100%|██████████| 10/10 [10:16<00:00, 61.64s/it]
forward: 100%|██████████| 10/10 [10:18<00:00, 61.86s/it]
forward: 100%|██████████| 10/10 [10:15<00:00, 61.51s/it]


k= 5 | Qwen Model: 62546.9±161.5 ms 
当前 qwen.pred_len = 6


forward: 100%|██████████| 10/10 [10:13<00:00, 61.30s/it]
forward: 100%|██████████| 10/10 [10:12<00:00, 61.26s/it]
forward: 100%|██████████| 10/10 [10:11<00:00, 61.13s/it]
forward: 100%|██████████| 10/10 [10:12<00:00, 61.26s/it]
forward: 100%|██████████| 10/10 [10:13<00:00, 61.30s/it]
forward: 100%|██████████| 10/10 [10:15<00:00, 61.52s/it]


k= 6 | Qwen Model: 62106.2±144.3 ms 
当前 qwen.pred_len = 7


forward: 100%|██████████| 10/10 [10:19<00:00, 61.93s/it]
forward: 100%|██████████| 10/10 [10:21<00:00, 62.19s/it]
forward: 100%|██████████| 10/10 [10:20<00:00, 62.05s/it]
forward: 100%|██████████| 10/10 [10:24<00:00, 62.44s/it]
forward: 100%|██████████| 10/10 [10:20<00:00, 62.03s/it]
forward: 100%|██████████| 10/10 [10:25<00:00, 62.57s/it]


k= 7 | Qwen Model: 63054.2±209.8 ms 
当前 qwen.pred_len = 8


forward: 100%|██████████| 10/10 [10:25<00:00, 62.57s/it]
forward: 100%|██████████| 10/10 [10:26<00:00, 62.61s/it]
forward: 100%|██████████| 10/10 [10:24<00:00, 62.46s/it]
forward: 100%|██████████| 10/10 [10:25<00:00, 62.51s/it]
forward: 100%|██████████| 10/10 [10:22<00:00, 62.23s/it]
forward: 100%|██████████| 10/10 [10:26<00:00, 62.62s/it]


k= 8 | Qwen Model: 63259.0±144.7 ms 
当前 qwen.pred_len = 9


forward: 100%|██████████| 10/10 [10:22<00:00, 62.20s/it]
forward: 100%|██████████| 10/10 [10:19<00:00, 61.92s/it]
forward: 100%|██████████| 10/10 [10:19<00:00, 62.00s/it]
forward: 100%|██████████| 10/10 [10:16<00:00, 61.64s/it]
forward: 100%|██████████| 10/10 [10:17<00:00, 61.75s/it]
forward: 100%|██████████| 10/10 [10:39<00:00, 63.98s/it]


k= 9 | Qwen Model: 63095.3±866.3 ms 
当前 qwen.pred_len = 10


forward: 100%|██████████| 10/10 [10:18<00:00, 61.88s/it]
forward: 100%|██████████| 10/10 [10:12<00:00, 61.26s/it]
forward: 100%|██████████| 10/10 [10:19<00:00, 61.99s/it]
forward: 100%|██████████| 10/10 [10:18<00:00, 61.85s/it]
forward: 100%|██████████| 10/10 [10:18<00:00, 61.80s/it]
forward: 100%|██████████| 10/10 [10:23<00:00, 62.30s/it]


k=10 | Qwen Model: 62607.1±323.9 ms 


In [24]:
import csv
with open("inference_latency_full_dataset.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["k", "qwen_mean_ms", "qwen_std_ms"])
    for idx, k in enumerate(range(1, 11)):
        writer.writerow([
            k,
            results["Qwen2.5-VL-7B"]["mean"][idx],
            results["Qwen2.5-VL-7B"]["std"][idx]
        ])

print("所有 k=1..10 的遍历数据集推理时延已保存到 inference_latency_full_dataset.csv")


所有 k=1..10 的遍历数据集推理时延已保存到 inference_latency_full_dataset.csv
