In [1]:
#image annotations with images
import os
import numpy as np
import pandas as pd
import imgaug.augmenters as iaa
from skimage import io
from skimage.io import imsave
import zipfile

# Path to your images
image_path = '/Users/lebakuprathyushkumarreddy/Desktop/testing_images'
image_files = [os.path.join(image_path, f'image{i+1}.jpg') for i in range(10)]
annotations =['Yes','yes','yes','yes','yes','yes','yes','yes','yes','yes'] 

# Image Augmentation
seq = iaa.Sequential([
    iaa.Fliplr(0.5), 
    iaa.Crop(percent=(0, 0.1)),  
    iaa.LinearContrast((0.75, 1.5)), 
    iaa.Multiply((0.8, 1.2))
])

images = [io.imread(f) for f in image_files]
augmented_images = []
augmented_annotations = []

augmented_image_path = '/Users/lebakuprathyushkumarreddy/Desktop/augmented_images/images'
augmented_annotation_path = '/Users/lebakuprathyushkumarreddy/Desktop/augmented_images/annotations'
os.makedirs(augmented_image_path, exist_ok=True)
os.makedirs(augmented_annotation_path, exist_ok=True)

#augmented images and annotations
for img_index, (img, annotation) in enumerate(zip(images, annotations)):
    images_aug = seq(images=[img] * 50)  # Generate 50 augmented images
    for aug_index, aug_img in enumerate(images_aug):
        image_filename = f'aug_image_{img_index}_{aug_index}.jpg'
        annotation_filename = f'aug_image_{img_index}_{aug_index}.txt'
        image_filepath = os.path.join(augmented_image_path, image_filename)
        annotation_filepath = os.path.join(augmented_annotation_path, annotation_filename)
        imsave(image_filepath, aug_img)
        with open(annotation_filepath, 'w') as file:
            file.write(annotation)

# Zipping augmented images and annotations
zip_path = '/Users/lebakuprathyushkumarreddy/Desktop/augmented_images.zip'
with zipfile.ZipFile(zip_path, 'w') as zipf:
    # Add images
    for root, _, files in os.walk(augmented_image_path):
        for file in files:
            zipf.write(os.path.join(root, file), arcname=os.path.join('images', file))
    # Add annotations
    for root, _, files in os.walk(augmented_annotation_path):
        for file in files:
            zipf.write(os.path.join(root, file), arcname=os.path.join('annotations', file))

print(f'Augmented images and annotations are organized and zipped in: {zip_path}')


Augmented images and annotations are organized and zipped in: /Users/lebakuprathyushkumarreddy/Desktop/augmented_images.zip


In [None]:
#colab code

pip install -q -U transformers==4.37.2
pip install -q bitsandbytes==0.41.3 accelerate==0.25.0

import torch
from transformers import BitsAndBytesConfig
from transformers import pipeline
from PIL import Image, ImageDraw
import os
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)

model_id = "llava-hf/llava-1.5-7b-hf"

pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})

folder_path_images = '/content/drive/MyDrive/augmented_images/images'
folder_path_annotations = '/content/drive/MyDrive/augmented_images/annotations'

# List all image files
image_paths = [os.path.join(folder_path_images, f) for f in os.listdir(folder_path_images) if f.endswith('.jpg')]

questions = [
    "Question 1: <image> Is he wearing hard hat? \nASSISTANT:",
]

predictions = []
ground_truths = []

for image_path in image_paths:
    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)

    # Corresponding annotation file
    base_filename = os.path.splitext(os.path.basename(image_path))[0]
    annotation_file = f"{folder_path_annotations}/{base_filename}.txt"

    # Read the ground truth annotation
    with open(annotation_file, 'r') as file:
        ground_truth = file.read().strip().lower()
        ground_truths.append(ground_truth)

    for question in questions:
        outputs = pipe(image, prompt=question, generate_kwargs={"max_new_tokens": 200})
        generated_text = outputs[0]["generated_text"]
        assistant_response = generated_text.split("\nASSISTANT:")[1].strip()

        first_word = assistant_response.split()[0].replace(',', '').lower() if assistant_response else "no response"
        print(first_word)
        predictions.append(first_word)

# Calculating the metrics
accuracy = accuracy_score(ground_truths, predictions)
precision = precision_score(ground_truths, predictions, pos_label='yes')
recall = recall_score(ground_truths, predictions, pos_label='yes')
f1 = f1_score(ground_truths, predictions, pos_label='yes')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
