In [None]:
!pip install qwen-vl-utils

In [None]:
!pip install git+https://github.com/huggingface/transformers

In [None]:
!pip install accelerate transformers pillow

In [2]:
from accelerate import Accelerator
from accelerate.utils import gather_object

accelerator = Accelerator()

# each GPU creates a string
message=[ f"Hello this is GPU {accelerator.process_index}" ]

# collect the messages from all GPUs
messages=gather_object(message)

# output the messages only on the main process with accelerator.print()
accelerator.print(messages)

['Hello this is GPU 0']


In [None]:
import os
import torch
import pandas as pd
from PIL import Image
import gc
import time
from accelerate import Accelerator
from accelerate.utils import gather_object
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info


In [None]:
# from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
# from qwen_vl_utils import process_vision_info
# import torch
# import os
# # os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments'

# # model = Qwen2VLForConditionalGeneration.from_pretrained(
# #     "/mnt/Main Drive/Codes/Deep Learning/LLM/Qwen2-VL-2B-Instruct",
# #     torch_dtype=torch.bfloat16,
# #     device_map='auto'
# # )

# model = Qwen2VLForConditionalGeneration.from_pretrained(
#     "Qwen/Qwen2-VL-2B-Instruct",
#     torch_dtype=torch.bfloat16,
#     attn_implementation="sdpa",
#     device_map="auto",
# )
# min_pixels = 256 * 28 * 28
# max_pixels = 1280 * 28 * 28
# processor = AutoProcessor.from_pretrained(
#     "Qwen/Qwen2-VL-2B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels
# )

In [None]:
from PIL import Image

In [None]:
data.shape

In [None]:
data = pd.read_csv("/kaggle/input/null-set/null/voltage_null.csv")
data= data.iloc[1676:2676]

In [None]:
def extract_filename(url):
    return os.path.join('/kaggle/input/null-set/test_null/test_null', os.path.basename(url))

In [None]:
data['image_path'] = data['image_link'].apply(extract_filename)

In [None]:
data

In [None]:
# Set up Accelerator for multi-GPU
accelerator = Accelerator()

In [None]:
def process_batch(batch_data, model, processor):
    results = []
    for item in batch_data:
        index, image_path, entity_name = item['index'], item['image_path'], item['entity_name']

        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "image": Image.open(image_path),
                    },
                    {
                        "type": "text",
                        "text": f"what is the voltage given in the image and give the answer in 3 words and in Volt as in the image",
                        "resized_height": 720,
                    }
                ]
            }
        ]

        text = processor.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )
        image_inputs, video_inputs = process_vision_info(messages)
        inputs = processor(
            text=[text],
            images=image_inputs,
            videos=video_inputs,
            padding=True,
            return_tensors="pt",
        )
        inputs = inputs.to(accelerator.device)

        with torch.no_grad():
            generated_ids = model.generate(**inputs, max_new_tokens=512)

        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        output_text = processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )
        # print(output_text)
        results.append({
            "index": index,
            "image_path": image_path,
            "entity_name": entity_name,
            "textual_data": output_text[0]
        })
        # print(results)

    return results


In [None]:
def main():
    batch_size = 10  # Adjust based on your GPU memory
    output_file = 'Voltage_null_output.csv'

    # Initialize the output CSV file on the main process
    if accelerator.is_main_process:
        if not os.path.exists(output_file):
            pd.DataFrame(columns=['index', 'image_path', 'entity_name', 'textual_data']).to_csv(output_file, index=False)

    # Load your data
#     data = pd.read_csv('/kaggle/input/your_dataset/your_data.csv')  # Adjust path as needed

    # Load model and processor
    model_path = "Qwen/Qwen2-VL-2B-Instruct"  # Adjust path as needed
    model = Qwen2VLForConditionalGeneration.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
#         attn_implementation="flash_attention_2",
        device_map="auto",  # This will distribute the model across available GPUs
    )

    min_pixels = 256 * 28 * 28
    max_pixels = 1280 * 28 * 28
    processor = AutoProcessor.from_pretrained(
        model_path, min_pixels=min_pixels, max_pixels=max_pixels
    )

    # Prepare model and processor for distributed setup
    model, processor = accelerator.prepare(model, processor)

    # Sync GPUs and start the timer
    accelerator.wait_for_everyone()
    start = time.time()

    # Divide the data among available GPUs
    with accelerator.split_between_processes(data.to_dict('records')) as local_data:
        all_results = []

        for i in range(0, len(local_data),batch_size):
            batch = local_data[i:i+batch_size]
            batch_results = process_batch(batch, model, processor)
            all_results.extend(batch_results)

            # Save intermediate results
            if accelerator.is_main_process:
                pd.DataFrame(batch_results).to_csv(output_file, mode='a', header=False, index=False)

        all_results = [all_results]  # Wrap in list for gather_object

    # Collect results from all GPUs
    results_gathered = gather_object(all_results)

    if accelerator.is_main_process:
        # Combine and save final results
        final_results = [item for sublist in results_gathered for item in sublist]
        pd.DataFrame(final_results).to_csv(output_file, index=False)

        time_diff = time.time() - start
        print(f"Total time elapsed: {time_diff:.2f} seconds")
        print(f"Processed {len(final_results)} items")

    # Clear GPU memory
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
if __name__ == "__main__":
    main()