In [41]:
import pathlib
import textwrap

import numpy as np
import pickle
from PIL import Image

from IPython.display import display
from IPython.display import Markdown

from matplotlib.pyplot import imshow

from sklearn.metrics import roc_auc_score
import re
import PIL.Image
import json
import os
import pandas as pd


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

import google.generativeai as genai

In [3]:
genai.configure(api_key='YOUR_API_KEY')

In [4]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-1.5-pro-latest
models/gemini-pro
models/gemini-pro-vision


In [5]:
safety_settings = [{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                   {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, 
                   {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                   {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]

generation_settings = {"top_p": 0.7, "max_output_tokens": 1024}

In [6]:

# model = genai.GenerativeModel('gemini-pro-vision', generation_config = generation_settings, safety_settings = safety_settings)
model = genai.GenerativeModel('gemini-pro-vision')

In [7]:
all_files = []

img_dir = '/home/username/microscopy/NFFA/'

for path, subdirs, files in os.walk(img_dir):
    for name in files:
        all_files.append(os.path.join(path, name))

In [None]:
all_files[-1]

In [9]:
last_processed_idx = -1
unprocessed_ids = []
responses = []

In [None]:
for idx, file in enumerate(all_files):
    if idx <= last_processed_idx:
        continue
    img = PIL.Image.open(file)
    print(f"Processing image: {file}")
    prompt = """INSERT YOUR PROMPT HERE. Refer to the appendix in the paper for example prompts."""
    response = model.generate_content([img, prompt])
    try:
        answer = response.text
    except Exception as e:
        unprocessed_ids.append(path)
        answer = "MODEL ERROR"
    print(answer)
    print()
    responses.append(answer)
    last_processed_idx = idx
    

In [None]:
response.candidates

In [None]:
response.prompt_feedback

In [16]:
len(unprocessed_ids)

38

In [17]:
with open('results_nffa_full_10classes_prediction_trial_1.pkl', 'wb') as f:
    pickle.dump({"responses": responses, "unprocessed_ids": unprocessed_ids}, f)

In [18]:
with open('results_nffa_full_10classes_prediction_trial_1.pkl', 'rb') as f:
    data = pickle.load(f)
    responses = data['responses']

In [19]:
counts = [(elem, sum(np.array(responses) == elem)) for elem in np.unique(np.array(responses))]

In [21]:
len(unprocessed_ids)

38

In [None]:
folder_id_map = {"Biological": 1,
                "Fibres": 2,
                "Films_Coated_Surface": 3,
                "MEMS_devices_and_electrodes": 4,
                "Nanowires": 5,
                "Particles": 6,
                "Patterned_surface": 7,
                "Porous_Sponge": 8,
                "Powder": 9,
                "Tips": 10}

class_id_map = {"biological": 1,
               "fibers": 2,
               "coated film": 3,
               "mems": 4,
               "nanowire": 5,
               "particles": 6,
               "patterned surface": 7,
               "porous sponge": 8,
               "powder": 9,
               "tips": 10}

actuals = []
predictions = []

for idx, file in enumerate(all_files):
    extracted_folder = file.split("/")[-2]
    actual_class_id = folder_id_map[extracted_folder]
    actuals.append(actual_class_id)
    
    pred_class_id = -1
    for class_name in class_id_map.keys():
        if class_name in responses[idx].lower():
            pred_class_id = class_id_map[class_name]
            break
    if pred_class_id == -1:
        print(responses[idx])
    predictions.append(pred_class_id)

In [34]:
len(actuals)

21169

In [35]:
len(predictions)

21169

In [36]:
accuracy = (np.array(actuals) == np.array(predictions)).sum()/len(actuals)

In [37]:
accuracy

0.5663942557513345

In [38]:
np.where(np.array(predictions) == -1)

(array([ 1226,  1313,  3579,  4409,  4589,  4909,  4993,  5890,  6122,
         6711,  6731,  6855,  7700,  7996,  8732,  8770,  9336,  9802,
         9974, 10140, 10354, 11367, 11693, 11792, 13538, 13606, 16464,
        16852, 16973, 17140, 17300, 17317, 17421, 17796, 17872, 19368,
        19582, 19703, 20149, 20620, 20865, 21015, 21151]),)

In [None]:
responses[41]

In [42]:
data = {'actual_class': actuals, 'predicted_class': predictions}
df = pd.DataFrame(data)

In [43]:
df.to_csv("gemini_full_NFFA.csv")