## Preparing Inputs for Model Generation

In [35]:
from datasets import load_dataset 
from transformers import AutoProcessor 

civil_plans_dict=load_dataset("genesis1SubHub/NHA-Civil-set")
civil_plans=civil_plans_dict["train"]

model_id="Qwen/Qwen2.5-VL-72B-Instruct"
processor = AutoProcessor.from_pretrained(
    model_id,
    max_pixels=12_845_056,
    use_fast=True, # default for qwen2.5 processor
)

In [2]:
from src.eval_utils import create_samples
from src.qwen_2_5_utils import generate_inputs_qwen2_5

system_prompt = '''
    You're a helpful assistant project manager in the construction industry.
    Here's some example scopes of work:
        1. "Remove existing interior walls as specified."
        2. "Provide and install 7 roooftop mechanical units." 
        3. "Design wood frame roof truss system"
    Place all scope items into a list, here's an example response with the example scopes above:
        "[
            "Remove existing interior walls as specified.",
            "Provide and install 7 roooftop mechanical units.",
            "Design wood frame roof truss system.",
        ]"
'''
prompt="Using the given image, identify all scopes of work on the page. Some pages may have no scopes."
input_lists=create_samples(
    plans=civil_plans,
    image_header="image",
    prompt=prompt,
    input_generator=generate_inputs_qwen2_5,
    system_prompt=system_prompt,
    processor=processor,
)

Number of samples generated: 27
  sample_structure
    input_ids : torch.Size([1, 11040])
    attention_mask : torch.Size([1, 11040])
    pixel_values : torch.Size([43520, 1176])
    image_grid_thw : torch.Size([1, 3])


## Model Setup and Inference

In [3]:
from src.qwen_2_5_utils import setup_model

model=setup_model()

Loading checkpoint shards: 100%|██████████| 38/38 [00:28<00:00,  1.31it/s]


In [4]:
from src.eval_utils import generate

trimmed_generated_ids=generate(
    model=model,
    samples=input_lists
)

Generation time elapsed: 65.0239 | New tokens generated: 242
Generation time elapsed: 43.0581 | New tokens generated: 174
Generation time elapsed: 66.7896 | New tokens generated: 294
Generation time elapsed: 44.3964 | New tokens generated: 177
Generation time elapsed: 105.9330 | New tokens generated: 500
Generation time elapsed: 58.9016 | New tokens generated: 254
Generation time elapsed: 58.2615 | New tokens generated: 256
Generation time elapsed: 61.3150 | New tokens generated: 266
Generation time elapsed: 106.4629 | New tokens generated: 500
Generation time elapsed: 52.2392 | New tokens generated: 218
Generation time elapsed: 106.5281 | New tokens generated: 500
Generation time elapsed: 78.3927 | New tokens generated: 355
Generation time elapsed: 64.0789 | New tokens generated: 284
Generation time elapsed: 67.0620 | New tokens generated: 299
Generation time elapsed: 51.2636 | New tokens generated: 211
Generation time elapsed: 85.4708 | New tokens generated: 395
Generation time elaps

In [20]:
import torch

def decode_outputs(
    processor,
    output_ids:list[list[torch.tensor]],
    feature_extraction:bool=False
)->list[str]:
    '''
    Args:
        feature_extraction: If set to 'True', each token is a seperate element in the output list. Used to match tokens corresponding to a scope, for heatmap visualization.  
    '''
    text_outputs=[]
    for output in output_ids:
        decomposed_output_text=processor.batch_decode(
            output if not feature_extraction else output[0], 
            skip_special_tokens=True, 
            clean_up_tokenization_spaces=False
        )
        text_outputs.extend(decomposed_output_text)
    return text_outputs

In [22]:
text_outputs=decode_outputs(processor=processor,output_ids=trimmed_generated_ids)

In [None]:
import re 
import json 
import types

def extract_resp(raw_input):
    json_str = re.search(
        r"\[.*\]", raw_input, re.DOTALL
    )
    return json_str

In [None]:
text_outputs=[extract_resp(text_output) for text_output in text_outputs]

In [27]:
import pandas as pd 

df=pd.DataFrame(
    {
        "Civil outputs":text_outputs,
        "page_id": civil_plans["page_id"],
    }
)
df.to_csv("NHA_civil_model_outputs.csv",index=False)