In [1]:
# test.py
import torch, os
from tqdm import tqdm
from PIL import Image
from transformers import AutoModel, AutoTokenizer

import time

start = time.time()
model = AutoModel.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5-int4', trust_remote_code=True, 
                                  torch_dtype=torch.float16)
print('Time to load model:', time.time()-start)
@torch.no_grad()	
def get_logits(self,
					msgs: list[dict],
					images: list[list[torch.Tensor]],
					tokenizer: AutoTokenizer,
					tgt_sizes: list[int],
					) -> torch.Tensor:
	
	input_id_list=[tokenizer.apply_chat_template([msg], tokenize=True, add_generation_prompt=False) for msg in msgs]
	img_list=images

	bs = len(input_id_list)
	if img_list == None:
		img_list = [[] for i in range(bs)]
	assert bs == len(img_list)

	model_inputs = model._process_list(tokenizer, input_id_list, max_inp_length = 2024)


	pixel_values = []
	for i in range(bs):
		img_inps = []
		for img in img_list[i]:
			img_inps.append(img.to(model.device))
		if img_inps:
			pixel_values.append(img_inps)
		else:
			pixel_values.append([])
	model_inputs["pixel_values"] = pixel_values
	model_inputs['tgt_sizes'] = [torch.vstack(tgt_sizes)]

	with torch.inference_mode():
		(
			model_inputs["inputs_embeds"],
			vision_hidden_states,
		) = model.get_vllm_embedding(model_inputs)

		return model.llm(inputs_embeds = model_inputs["inputs_embeds"])

model.get_logits = get_logits
model.yes_tokens = [9642, 9891, 14331, 20137, 41898, 58841, 60844, 77830, 85502]
model.no_tokens = [2201, 2822, 6673, 9173, 9278, 17184, 18847, 34200, 38089, 39522]
model.eval()

tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True)




Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Time to load model: 5.531465530395508


In [2]:
import pickle

with open("../Concept_Mining/generated_concepts.pkl", "rb") as f:
	data = pickle.load(f)
	generated_concepts = data['concepts']
	topic_groups = data['topic_groups']

In [None]:
from utils import zero_shot_MINICPM_QA
from glob import glob

images = glob(f"../dataset/Images/*.jpg")

prediction, _ = zero_shot_MINICPM_QA(model, images, generated_concepts, 
								tokenizer, batch_size = 70,
								)

In [3]:
z = [6.557735919952393, 6.600693225860596, 6.412484169006348,
 6.292077541351318, 6.484212398529053]
import numpy as np
print(np.mean(z), np.std(z))

6.469440650939942 0.10949265816237837


In [2]:
from utils import zero_shot_MINICPM_QA
import numpy as np
import json
from glob import glob

concepts = [
 "picture of roads made out of soil",
 "picture of people",
 "picture of roads made out of asphalt",
 "picture of people wearing dark clothes",
 "picture of people wearing colorful clothes"]

images = glob(f"s1/*.png")
zero_labels = [json.loads(open(f"{images[i]}.json").read())['metadata']['labels'] for i in range(len(images))]


Model loaded


In [5]:
import time

In [6]:

times = []

for i in range(10):
	start = time.time()
	prediction, _ = zero_shot_MINICPM_QA(model, images, concepts, 
								tokenizer, batch_size = 16,
								)

	times.append(time.time() - start)

100%|██████████| 100/100 [01:50<00:00,  1.11s/it]
100%|██████████| 100/100 [01:47<00:00,  1.07s/it]
100%|██████████| 100/100 [01:48<00:00,  1.08s/it]
100%|██████████| 100/100 [01:58<00:00,  1.18s/it]
100%|██████████| 100/100 [01:55<00:00,  1.15s/it]
100%|██████████| 100/100 [02:00<00:00,  1.20s/it]
100%|██████████| 100/100 [01:56<00:00,  1.16s/it]
100%|██████████| 100/100 [02:16<00:00,  1.36s/it]
100%|██████████| 100/100 [02:01<00:00,  1.21s/it]
100%|██████████| 100/100 [01:55<00:00,  1.15s/it]


In [7]:
print(np.mean(times), np.std(times))

116.89601318836212 7.869818195886419


In [8]:

one_hot = np.zeros((len(images), len(concepts)))
for j, i in enumerate(zero_labels):
    one_hot[j, i] = 1
    
from sklearn.metrics import f1_score
f1 = f1_score(one_hot, prediction.cpu().numpy(), average=None)
print("Metric per class:", f1)
print("Mean metric:", np.mean(f1))

Metric per class: [0.76470588 0.99453552 0.24489796 0.89655172 0.74074074]
Mean metric: 0.7282863651081939


In [None]:
import torch

torch.cuda.is_available()

True