In [None]:
! pip3 install --upgrade --user google-cloud-aiplatform

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.72.0-py2.py3-none-any.whl.metadata (31 kB)
Downloading google_cloud_aiplatform-1.72.0-py2.py3-none-any.whl (6.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m52.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-cloud-aiplatform
[0mSuccessfully installed google-cloud-aiplatform-1.72.0


In [3]:
import vertexai
import csv
from google.cloud import storage

from io import StringIO

from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    Part,
    HarmCategory,
    HarmBlockThreshold,
)
model_id = 'gemini-1.5-pro'
safety_settings = {
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: HarmBlockThreshold.BLOCK_ONLY_HIGH,
}
model = GenerativeModel(
    model_id,
    safety_settings=safety_settings,
)

# Define project information
PROJECT_ID = "sul-ai-sandbox"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
BUCKET_NAME = "cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4"
INPUTFILE = 'image_search/out-7123.csv'
OUTPUTFILE = 'image_search/output/b-v2.csv'

# Initialize Vertex AI
vertexai.init(project=PROJECT_ID, location=LOCATION)


# Send Google Cloud Storage Document to Vertex AI
def process_document(
    prompt: str,
    file_uri: str,
    generation_config: GenerationConfig | None = None,
) -> str:
    # Load file directly from Google Cloud Storage
    file_part = Part.from_uri(
      uri=file_uri,
      mime_type="image/jpeg",
    )

    # Load contents
    contents = [file_part, prompt]

    try:
      # Send to Gemini
      response = model.generate_content(contents)#, generation_config=generation_config)

      return response.text
    except ValueError as e:
      # Handle the ValueError exception
      print(f"A ValueError occurred: {e}")
    except aiplatform.gapic.exceptions.GoogleAPICallError as api_error:
        # Handle generic Google API call errors
        print(f"A Google API call error occurred: {api_error}")
    except Exception as e:
        # Handle any other unforeseen errors
        print(f"An unexpected error occurred: {e}")

def get_blob(blob_name):
  client = storage.Client()
  bucket = client.bucket(BUCKET_NAME)
  return bucket.blob(blob_name)

def description(file):
  file_uri = f'gs://cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4/{file}'
  print(file_uri)
  prompt = """This is a photograph in a collection of photographs about the Stanford University Marching Band.
  The Leland Stanford Junior University Marching Band (LSJUMB) is the student marching band representing Stanford University and its athletic teams. Billing itself as "The World's Largest Rock and Roll Band," the Stanford Band performs at sporting events, student activities, and other functions. The Stanford Tree is the band's mascot.
  While avoiding descriptions of the subjects race or identity, concisely describe this image in 150 characters or fewer. Please avoid starting the description with "This is a photo of..." or "This is an image of...", just say what it is in the image."""
  return process_document(prompt, file_uri)

csv_buffer = StringIO()

# Create a CSV writer
writer = csv.writer(csv_buffer)

# Write header row
#writer.writerow(["Druid", "File", "Description"])

with get_blob(INPUTFILE).open() as csvfile:
  reader = csv.reader(csvfile)

  next(reader) # skip headers
  count = 0
  for row in reader:
    druid = row[0]
    if druid.startswith('b') and count < 1000:
      count += 1
      file = row[1].removesuffix('.jp2')
      stored_file = f'image_search/stacks/{druid}/{file}.jpg'
      writer.writerow([druid, row[1], description(stored_file)])


# Get the CSV content as a string
csv_content = csv_buffer.getvalue()
get_blob(OUTPUTFILE).upload_from_string(csv_content, content_type='text/csv')



gs://cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4/image_search/stacks/bb001nx1648/PC0170_s3_Fiesta_Bowl_2012-01-02_210609_2026.jpg
gs://cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4/image_search/stacks/bb002hc2341/PC0170_s3_Harry_Potter_Rally_2007-07-20_222151_0073.jpg
gs://cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4/image_search/stacks/bb002wb0348/PC0170_s3_Army_2014-09-13_122754_0529.jpg
gs://cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4/image_search/stacks/bb006fq3601/PC0170_s3_Davis_Day_2011-04-16_161634_0653.jpg
gs://cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4/image_search/stacks/bb008gf1243/PC0170_s3_Oregon_2009-11-07_125555_0184.jpg
gs://cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4/image_search/stacks/bb010bw7235/PC0170_s3_Oregon_State_2008-08-28_162410_0125.jpg
gs://cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4/image_search/stacks/bb014kh3056/PC0170_s3_Oregon_2011-11-12_210238_1491.jpg
gs://cloud-ai-platform-e