In [None]:
# MiniCPM-V-2.6 Zero-Shot Prompting for Entity Extraction
# This notebook demonstrates the MiniCPM ZSP strategy using the project's modular pipeline.

import sys
import os

# Add project root to path
project_root = os.path.abspath(os.path.join(os.getcwd(), "..", "..", ".."))
sys.path.insert(0, project_root)

from src.config import (
    MINICPM_MODEL_NAME, MINICPM_DOWNLOAD_DIR, BATCH_SIZE,
    MINICPM_PROMPT_TEMPLATE, TEST_IMAGE_DIR, TRAIN_CSV, TEST_CSV,
    PROCESSED_DATA_DIR, MINICPM_OUTPUT_CSV,
)
from src.constants import ENTITY_UNIT_MAP
from src.utils import parse_model_response, post_process_prediction

print(f"Model: {MINICPM_MODEL_NAME}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Entity types: {list(ENTITY_UNIT_MAP.keys())}")

In [None]:
# Validate output
from src.sanity import check_output_format

check_output_format(str(MINICPM_OUTPUT_CSV), str(TEST_CSV))

In [None]:
# Run MiniCPM Zero-Shot Prediction
from src.models.minicpm_zsp import predict_minicpm

# Run inference (requires GPU with sufficient VRAM)
result_df = predict_minicpm(test_df)
print(f"\nPredictions generated: {len(result_df)}")
print(f"Non-empty predictions: {(result_df['prediction'].str.strip() != '').sum()}")
print(f"\nSample predictions:")
result_df[result_df["prediction"] != ""].head(10)

In [None]:
# Load test data
import pandas as pd

test_df = pd.read_csv(TEST_CSV)
print(f"Test samples: {len(test_df)}")
print(f"\nEntity distribution:")
print(test_df["entity_name"].value_counts())