In [None]:
import os
import sys
# Add src to path to import modules
if 'src' not in sys.path:
    sys.path.append('src')

import numpy as np
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
os.environ["UNSLOTH_STABLE_DOWNLOADS"] = "1"

import json
from typing import Callable, Dict, Tuple
from datetime import datetime

import torch
import unsloth

from utils.utils import (
    load_config,
    normalize_quantization,
    save_submission_csv,
    load_json_data
)
from utils.postprocessing import post_process_qwen3vl_output
from dotenv import load_dotenv
from tqdm import tqdm

from models.utils import load_model
from data import build_user_content
from infer import predict_answer

def seed_everything(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
seed_everything(42)

# Load environment variables from .env file
load_dotenv()


# --- Main inference logic ---
config_path = "configs/config_unsloth_infer.yaml"

# Load configuration
config = load_config(config_path)
model_name = config.get("model_name", "DAMO-NLP-SG/VideoLLaMA3-7B")
infer_data_path = config.get("infer_data_path", 
                             os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(''))), "data/public_test/public_test.json"))
output_path = config.get("output_path", None)
attn_implementation = config.get("attn_implementation", "flash_attention_2")
quantization = normalize_quantization(config)
use_unsloth = config.get("use_unsloth", False)
signs_dir = config.get("signs_dir", "data/traffic_signs")

# Load traffic signs
from glob import glob
signs = glob(os.path.join(signs_dir, "*")) if signs_dir and os.path.exists(signs_dir) else None


# Get tokens from environment
hf_token = os.getenv("HF_TOKEN")



In [None]:
# Load model
print("Loading model...")
model, processor, tokenizer = load_model(
    model_name=model_name,
    attn_implementation=attn_implementation,
    quantization=quantization,
    use_unsloth=use_unsloth,
    hf_token=hf_token,
    inference_mode=True,
)


# # Run inference
# print("Running inference...")
# results = run_inference(
#     model, 
#     processor, 
#     tokenizer, 
#     test_data, 
#     model_name, 
#     use_unsloth,
#     signs=signs
# )

# # Save results
# print("Saving results...")
# save_submission_csv(results, infer_data_path, output_path)

# print("Inference complete. Results saved.")

In [None]:
# Load test data
print("Loading test data...")
test_data = load_json_data(infer_data_path)

In [None]:
item = test_data['data'][0]  # Example for single item inference

messages = build_user_content(
        item["video_path"], 
        item["question"], 
        item["choices"], 
        use_unsloth=use_unsloth,
        signs=signs
    )
response = predict_answer(
        model=model,
        processor=processor,
        tokenizer=tokenizer,
        messages=messages,
        model_name=model_name,
        use_unsloth=use_unsloth,
    )

In [None]:
from time import time

# DEBUG = True
# if DEBUG:
#     test_data['data'] = test_data['data'][:5]  # Limit to first 5 items for debugging

results = []

for item in tqdm(test_data["data"]):
    t1 = time()
    messages = build_user_content(
        item["video_path"], 
        item["question"], 
        item["choices"], 
        use_unsloth,
        signs=signs
    )
    response = predict_answer(
            model=model,
            processor=processor,
            tokenizer=tokenizer,
            messages=messages,
            model_name=model_name,
            use_unsloth=use_unsloth,
        )
    
    response = post_process_qwen3vl_output(response)
    t2 = time()
    predicted_time = int(t2*1000 - t1*1000)
    results.append({"id": item.get("id", ""), "answer": response, 'time (millisecond)': predicted_time})

In [None]:
import pandas as pd

result_df = pd.DataFrame(results)
result_df.to_csv("time_submission.csv", index=False)
result_df[['id', 'answer']].to_csv("jupyter_submission.csv", index=False)