# VisDoMRAG Qwen Notebook

`visdomrag/` 패키지를 사용해 Qwen 기반 VisDoMRAG 파이프라인을 단계별로 실행하는 템플릿입니다.

In [None]:
import os

def set_gpu(gpu_ids=None):
    if not gpu_ids:
        os.environ.pop('CUDA_VISIBLE_DEVICES', None)
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(map(str, gpu_ids)) if isinstance(gpu_ids, (list, tuple)) else str(gpu_ids)

# 1) ColPali 인덱싱 단계: GPU 1 사용
set_gpu(1)
print('ColPali 단계 GPU:', os.environ.get('CUDA_VISIBLE_DEVICES'))


%load_ext autoreload
%autoreload 2

import os
import sys
from pathlib import Path

# GPU ID to use (e.g., '0', '1'); leave empty to keep current setting
GPU_ID = '0'
if GPU_ID:
    os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID

ROOT = Path.cwd().resolve()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

ROOT

## 1. 환경 설정
필요한 패키지를 설치하고, 프로젝트 루트를 파이썬 경로에 추가합니다.

In [None]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

ROOT = Path.cwd().resolve()


if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

ROOT

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


PosixPath('/Users/gimchaeyeon/Documents/2025/GL/VisDoM-main')

## 2. 모듈 임포트
`visdomrag` 패키지에서 제공하는 헬퍼들을 불러옵니다.

In [9]:
import json
from pprint import pprint

import pandas as pd

from visdomrag import (
    VisDoMRAGConfig,
    load_dataset,
    RetrievalManager,
    init_qwen,
    process_query,
    run_pipeline,
)

## 3. 실험 설정값 정의
데이터/출력 경로와 리트리버 종류 등을 필요에 맞게 수정하세요.

In [10]:
DATA_DIR = ROOT / 'feta_tab'
OUTPUT_DIR = ROOT / 'outputs' / 'feta_tab_qwen'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

config = VisDoMRAGConfig(
    data_dir=DATA_DIR,
    output_dir=OUTPUT_DIR,
    vision_retriever='colpali',
    text_retriever='bm25',
    top_k=5,
    force_reindex=False,
)
config.ensure_directories()
config

VisDoMRAGConfig(data_dir=PosixPath('/Users/gimchaeyeon/Documents/2025/GL/VisDoM-main/feta_tab'), output_dir=PosixPath('/Users/gimchaeyeon/Documents/2025/GL/VisDoM-main/outputs/feta_tab_qwen'), csv_path=None, llm_model='qwen', vision_retriever='colpali', text_retriever='bm25', top_k=5, chunk_size=3000, chunk_overlap=300, qa_prompt='Answer the question objectively based on the provided context.', force_reindex=False, api_keys={}, vision_device='auto', vision_torch_dtype=None)

## 4. 데이터 로드 & 리트리버 초기화
CSV를 DataFrame으로 읽고 RetrieverManager가 모델을 준비하도록 합니다.

In [11]:
df = load_dataset(config)
retrieval = RetrievalManager(config=config, df=df)
len(df)

Fetching 2 files: 100%|██████████| 2/2 [00:00<00:00, 16008.79it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:22<00:00, 11.03s/it]


350

## 5. Qwen 리소스 로드
GPU 메모리를 사용하므로 한 번만 초기화하고 계속 재사용하세요.

In [None]:
# 인덱싱 완료 후 메모리 정리

import torch
if torch.cuda.is_available():
    torch.cuda.empty_cache()

# 2) Qwen 추론 단계: GPU 2 사용
set_gpu('2, 3')
print('Qwen 단계 GPU:', os.environ.get('CUDA_VISIBLE_DEVICES'))


In [None]:
qwen = init_qwen(model_name='Qwen/Qwen2-VL-7B-Instruct', device_map='auto')

qwen.device



UnboundLocalError: local variable 'resolved_device_map' referenced before assignment

## 6. 단일 쿼리 테스트
원하는 `q_id`를 입력해 전체 파이프라인을 검증합니다.

In [None]:
sample_id = df['q_id'].iloc[0]
sample_id

np.int64(12844)

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# 토크나이저 병렬화를 꺼두었다

In [None]:
from tqdm import tqdm  # if not already imported

sample_ids = tqdm([sample_id], desc='Processing sample_id')
for qid in sample_ids:
    success = process_query(config, retrieval, qwen, query_id=qid)
    print('Success:', success)


Processing PDFs for visual index:   0%|          | 1/600 [00:16<2:47:18, 16.76s/it]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Processing PDFs for visual index:   0%|          | 2/600 [01:03<5:43

KeyboardInterrupt: 

## 7. 다수 쿼리 일괄 실행
샘플 q_id 목록을 구성해 `run_pipeline`을 호출합니다.

In [None]:
subset = df['q_id'].sample(n=5, random_state=42).tolist()
results = run_pipeline(config, retrieval, qwen, query_ids=subset, delay_seconds=0)
results

## 8. 결과 확인
생성된 JSON 파일을 열어 Qwen 응답을 점검합니다.

In [None]:
result_path = config.combined_output_dir / f"response_{sample_id.replace('/', '$')}.json"
payload = json.loads(result_path.read_text())
pprint(payload)

## 9. 메모리 정리 (선택)
장시간 실험 시 CUDA 메모리를 비워줍니다.

In [None]:
import gc
import torch

if 'retrieval' in globals():
    del retrieval
    
if 'qwen' in globals():
    del qwen
    
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()