In [2]:
import numpy as np
import pandas as pd
import json
from google import genai

from google.oauth2.service_account import Credentials
from google.cloud import storage
import os


scopes = ["https://www.googleapis.com/auth/cloud-platform"]
SERVICE_ACCOUNT_FILE = "/Data2/Arun-UAV/NLP/self-halu-detection/vertix_ai.json"
credentials = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=scopes)

client = storage.Client(credentials=credentials)

gen_client = genai.Client(
    vertexai=True,
    project='hazel-math-472314-h9',   # or set directly
    location='us-central1',    # or set directly, e.g. "us-central1"
    credentials=credentials
)

In [5]:
poc_df = pd.read_csv("/Data2/Arun-UAV/NLP/vision_halu/train_datasets/coco_5000_gcp_upload_urs.csv")
batch_df = pd.read_csv("/Data2/Arun-UAV/NLP/vision_halu/train_datasets/coco_batch_1_15000_gcp_upload_urs.csv")

In [10]:
poc_df.head(2)

Unnamed: 0,local_path,gcs_uri,error
0,/Data2/Arun-UAV/NLP/vision_halu/train_datasets...,gs://train_data_vision/poc_coco_5000/COCO_trai...,
1,/Data2/Arun-UAV/NLP/vision_halu/train_datasets...,gs://train_data_vision/poc_coco_5000/COCO_trai...,


In [12]:
final_df = batch_df[~batch_df["local_path"].isin(poc_df["local_path"].to_list())]

In [17]:
df = final_df.copy()

# batch creation

In [18]:

PROMPT = """
You are a specialist in rich and precise scene understanding.
Given an input image, generate a comprehensive, contextually aware, and fluent description that captures all key visual elements, their relationships, emotions, and possible context or story.

Your description should go beyond short captions — it must resemble a paragraph of visual storytelling that includes:

Scene type: indoor/outdoor, environment, lighting, time of day
Objects and entities: names, counts, shapes, colors, materials
Actions and interactions: what the people or objects are doing
Spatial layout: foreground, background, relative positions
Emotions or atmosphere: tone, mood, aesthetics
Possible context: what might be happening or implied by the scene

Avoid generic or repetitive statements. Be vivid, factual, and coherent. Use natural language instead of bullet points.

Output JSON format:
{image_description: <full attached image description>}
"""

all_res = []
for uri in df["gcs_uri"].tolist():
    res = {"request":{"contents": [{"role": "user", "parts": [{"text": PROMPT}, {"fileData": {"fileUri": uri, "mimeType": "image/jpeg"}}]}], 
                      "generationConfig": {"temperature": 0.6, "topP": 1, "maxOutputTokens": 1000,"thinking_config":{"thinking_budget":0}}}}
    all_res.append(res)

In [19]:
len(all_res)

9688

In [20]:
df = pd.DataFrame(all_res)
df.to_json("/Data2/Arun-UAV/NLP/vision_halu/train_datasets/gemini_batch.jsonl", orient="records", lines=True)

# Uploading files to gcp

In [21]:
def upload_to_gcs(local_path: str, gcs_uri: str):
    """
    Upload a local file to a target GCS URI.

    Args:
        local_path (str): Local file path to upload.
        gcs_uri (str): Target GCS URI like 'gs://my-bucket/path/to/upload.txt'
        service_account_path (str): Path to GCP service account JSON.
    """
    if not gcs_uri.startswith("gs://"):
        raise ValueError("Invalid GCS URI. Must start with gs://")

    parts = gcs_uri[5:].split("/", 1)
    bucket_name = parts[0]
    blob_name = parts[1]

    bucket = client.bucket(bucket_name)
    blob = bucket.blob(blob_name)

    blob.upload_from_filename(local_path)

    print(f"✅ Uploaded {local_path} → {gcs_uri}")


In [22]:
upload_to_gcs(local_path="/Data2/Arun-UAV/NLP/vision_halu/train_datasets/gemini_batch.jsonl", gcs_uri = "gs://train_data_vision/gemini_batch_info/gemini_batch.jsonl")

✅ Uploaded /Data2/Arun-UAV/NLP/vision_halu/train_datasets/gemini_batch.jsonl → gs://train_data_vision/gemini_batch_info/gemini_batch.jsonl


In [None]:
# start batch processing

# start batch processing

In [23]:
import time

from google import genai
from google.genai.types import CreateBatchJobConfig, JobState, HttpOptions
output_uri = "gs://train_data_vision/gemini_batch_info/"

# See the documentation: https://googleapis.github.io/python-genai/genai.html#genai.batches.Batches.create
job = gen_client.batches.create(
    model="gemini-2.5-flash",
    src="gs://train_data_vision/gemini_batch_info/gemini_batch.jsonl",
    config=CreateBatchJobConfig(dest=output_uri),
)
print(f"Job name: {job.name}")
print(f"Job state: {job.state}")

Job name: projects/564344106944/locations/us-central1/batchPredictionJobs/6521008249485918208
Job state: JOB_STATE_PENDING


In [32]:
job_info = gen_client.batches.get(name=job.name)
job_info.state

<JobState.JOB_STATE_SUCCEEDED: 'JOB_STATE_SUCCEEDED'>

# download batch results

In [33]:
def download_from_gcs(gcs_uri: str, local_path: str):
    """
    Download a file from GCS based on its gs:// URI.

    Args:
        gcs_uri (str): GCS URI like 'gs://my-bucket/path/to/file.txt'
        local_path (str): Path to store the downloaded file locally.
    """
    # Parse bucket and blob name
    if not gcs_uri.startswith("gs://"):
        raise ValueError("Invalid GCS URI. Must start with gs://")

    parts = gcs_uri[5:].split("/", 1)
    bucket_name = parts[0]
    blob_name = parts[1]
    
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(blob_name)

    os.makedirs(os.path.dirname(local_path), exist_ok=True)
    blob.download_to_filename(local_path)

    print(f"✅ Downloaded {gcs_uri} → {local_path}")


In [34]:
download_from_gcs(gcs_uri="gs://train_data_vision/gemini_batch_info/prediction-model-2025-10-16T04:27:09.140433Z/predictions.jsonl", local_path ="/Data2/Arun-UAV/NLP/vision_halu/train_datasets/gemini_btach_res.jsonl")

✅ Downloaded gs://train_data_vision/gemini_batch_info/prediction-model-2025-10-16T04:27:09.140433Z/predictions.jsonl → /Data2/Arun-UAV/NLP/vision_halu/train_datasets/gemini_btach_res.jsonl


In [35]:
pred_data = pd.read_json("/Data2/Arun-UAV/NLP/vision_halu/train_datasets/gemini_btach_res.jsonl", lines=True)

In [36]:
pred_data.head(2)

Unnamed: 0,status,processed_time,request,response
0,,2025-10-16 04:30:28.306000+00:00,"{'contents': [{'parts': [{'fileData': None, 't...",{'candidates': [{'avgLogprobs': -0.44019927978...
1,,2025-10-16 04:31:05.061000+00:00,"{'contents': [{'parts': [{'fileData': None, 't...",{'candidates': [{'avgLogprobs': -0.43630051424...


In [37]:
pred_data.shape

(9688, 4)

In [38]:
all_res = []
for inx, row in pred_data.iterrows():
    try:
        img_name = row["request"]["contents"][0]["parts"][1]["fileData"]["fileUri"].split("/")[-1]
        img_dec = row["response"]["candidates"][0]["content"]["parts"][0]["text"]
        c_img_dec = eval(img_dec.replace("json", "").strip("```"))["image_description"]
        all_res.append({"image": img_name, "description": c_img_dec})
    except Exception as e:
        print(f"Error processing row {inx}: {e}")

Error processing row 132: invalid syntax. Perhaps you forgot a comma? (<string>, line 3)
Error processing row 349: 'candidates'
Error processing row 781: 'candidates'
Error processing row 2603: unterminated string literal (detected at line 3) (<string>, line 3)
Error processing row 2823: 'candidates'
Error processing row 3359: 'candidates'
Error processing row 3769: 'candidates'
Error processing row 6282: unterminated string literal (detected at line 3) (<string>, line 3)
Error processing row 6663: unterminated string literal (detected at line 2) (<string>, line 2)
Error processing row 7939: 'candidates'
Error processing row 8313: unterminated string literal (detected at line 4) (<string>, line 4)
Error processing row 9281: unterminated string literal (detected at line 3) (<string>, line 3)
Error processing row 9643: 'candidates'


In [39]:
df = pd.DataFrame(all_res)

In [41]:
df.head(2)

Unnamed: 0,image,description
0,COCO_train2014_000000467172.jpg,The indoor scene captures two domestic cats ex...
1,COCO_train2014_000000328023.jpg,This outdoor scene captures a lively moment in...


In [42]:
old_poc_df  = pd.read_csv("/Data2/Arun-UAV/NLP/vision_halu/train_datasets/coco_5000_train_with_gemini_des.csv")

In [92]:
df.to_csv("/Data2/Arun-UAV/NLP/vision_halu/train_datasets/coco_5000_train_with_gemini_des.csv", index=False)

In [43]:
old_poc_df.head(2)

Unnamed: 0,image,description
0,COCO_train2014_000000078572.jpg,This outdoor scene captures a bustling street ...
1,COCO_train2014_000000401963.jpg,The image captures an outdoor scene under brig...


In [44]:
final_df_15k = pd.concat([old_poc_df, df])

In [46]:
final_df_15k["image"].nunique()

14579

In [47]:
final_df_15k.to_csv("/Data2/Arun-UAV/NLP/vision_halu/train_datasets/coco_batch_1_15000_train_with_gemini_des.csv", index=False)