In [1]:
import wr_score_util as wu 
import datetime
import numpy as np
import cv2
from tqdm import tqdm
from google.genai import types
from google import genai
import os
import pandas as pd
import re
from time import sleep
import imageio.v3 as iio
import typing 
from pydantic import BaseModel, Field
from typing import List, Literal
import json
import time

# For full documentation please refer to https://ai.google.dev/gemini-api/docs

In [3]:
# scheme for structured output
class TimelineEvent(BaseModel):
    """Represents a single, classified event in the trial timeline."""
    time_range: str = Field(description="The start and end time of the event in 'start_time - end_time' format, e.g., '0.5s - 1.2s'.")
    
    event_classification: str = Field(
        description="A brief classification of the event (maximum 4 words), e.g., 'Paw reaches for spout' or 'Tongue licks paw'."
    )
    
    event_description: str = Field(
        description="A concise, objective, and detailed description of what was visually observed during the time range."
    )

# Define the main schema for the entire trial analysis
class TrialAnalysis(BaseModel):
    """
    Provides a structured analysis of a mouse's behavior in a water-reaching trial.
    All fields must be based on clear, indisputable visual evidence only.
    """
    tongue_contact: bool = Field(description="Did the mouse's tongue make physical contact with its paw after the paw touched the water drop?")
    water_drop_stable: bool = Field(description="Did the water drop remain fully attached to the spout before any interaction?")
    water_spilled: bool = Field(description="Did the mouse drop or splash any part of the water drop during retrieval?")
    percentage_consumed: int = Field(description="What percentage of the water drop did the mouse drink? Must be 0 if no contact was made.", ge=0, le=100)
    outcome_classification: Literal["Successful", "Partial Success", "Failed"] = Field(description="Classification of the trial's outcome.")
    justification: str = Field(description="Brief, evidence-based justification for the outcome classification.")
    timeline: List[TimelineEvent] = Field(description="A chronological list of key, observable events.")

In [None]:
temperature = 0
top_p = 0.9

sys_instruc =("You are a research assistant tasked with objectively scoring a mouse's behavior in a water-reaching trial." 
              "Your analysis must be based only on clear, indisputable visual evidence. Pay extremely close attention to the following elements:"
              "1. The Water Drop: Note its formation, its position on the spout, and any change to its shape or location."
              "2. The Mouse's Tongue (very obvious and similar to that of a dog's): Look for the moment the tongue is extended or if the tongue physically touches the water."
              "3. The Mouse's Paws: Observe if the paws lift off the surface and move towards the spout. Mainly the mouse's right paw (the mouse's perspective)."
              "Most importantly, the mouse is head fixed meaning it cannot move its head and its tongue cannot lick the spout directly.(the mouse's perspective and the one closest to the metal spout when the paws are at rest)"
              "The water drop can only be consumed by its right paw reaching the water drop on the spout and bringing it close to its mouth for it to lick."
              "If the mouse's right paw did not come near to the waterdrop or moved at all, it couldn't have drank any of the water drop."
              "The water drop is always delivered within the first 30-40 seconds of the video. In some cases, the water drop might nos stick to the spout for the mouse to touch"
              "Do not infer success or failure; only report the physical interactions you see. In each video, only one drop is delivered."
              "If no change or movement (right paw) is detected for a specific feature (e.g., paw, tongue), you must state 'No change was observed."
              "Do not repeat the question in your responses"
              "Do not describe an event unless there is undeniable visual evidence")


a1 = "Did the mouse's tongue make physical contact with the water drop? (Answer: Yes or No)"
a2 = "Did the water drop remain fully attached to the spout before the mouse touched it? (Answer: Yes or No)"
a3 = 'Did the mouse drop or splashed some of the water when retrieving the water to drink ? (Yes or No)'
a4 = "What percentage of the water drop did the mouse drink? (If no physical contact was made between the tongue and the drop, the answer must be 0%)."
a5 = """
Based on the provided video of a head-fixed mouse performing a water-reaching trial, classify the outcome as one of the following:
Successful: The mouse retrieved most of the water (more than 70%).
Partial Success: The mouse retrieved some of the water (between 50% and 70%).
Failed: The mouse retrieved little or none of the water (less than 40%).
Clearly state your classification and briefly justify your choice using only observable evidence from the video.
"""
a5 = a5.replace('\n', ' ')
a6 = "Provide a timeline of key events. Use the following specific format and describe only what you see ( Time start - Time end: Event)."


api_key = '' #add your api key here https://aistudio.google.com/prompts/new_chat
MODEL_ID="gemini-2.5-pro" 

config=types.GenerateContentConfig(
    temperature=temperature,
    system_instruction = sys_instruc,
    top_p=top_p)
config = types.GenerateContentConfig(
    temperature=temperature,
    top_p=top_p,
    # The structured output settings belong here:
    response_mime_type="application/json",
    response_schema=TrialAnalysis,
    system_instruction = sys_instruc
)


save_path = '/mnt/team/TM_Lab/Tony/wr_new/data_used/tta_gcamp8s/gemini_predictions_full_sys'


In [6]:
def paths_to_dict_unique(file_path):
    """
    Reads a file containing file paths (one per line) and returns a dictionary
    with the basename as key and the full path as value. If duplicate basenames
    exist, the last one is kept.
    """
    result = {}
    with open(file_path, 'r') as f:
        for line in f:
            full_path = line.strip()
            if full_path:
                base_name = os.path.basename(full_path)
                result[base_name] = full_path
    return result
file_paths = 'vid_paths.txt'
path_dict = paths_to_dict_unique(file_paths)

In [None]:
def paths_to_dict_unique(file_path):
    """
    Reads a file containing file paths (one per line) and returns a dictionary
    with the basename as key and the full path as value. If duplicate basenames
    exist, the last one is kept.
    """
    result = {}
    with open(file_path, 'r') as f:
        for line in f:
            full_path = line.strip()
            if full_path:
                base_name = os.path.basename(full_path)
                result[base_name] = full_path
    return result
file_paths = 'vid_paths.txt'
path_dict = paths_to_dict_unique(file_paths)

In [7]:
client = genai.Client(api_key=api_key)

In [9]:
# Assuming video_paths is a list of video file paths
video_paths = [...]  # Your list of video paths here

# Create event_log from video paths
event_log = pd.DataFrame({
    'video_path': video_paths,
    'tongue_contact': pd.NA,
    'water_drop_stable': pd.NA,
    'water_spilled': pd.NA,
    'outcome_classification': pd.NA,
    'justification': pd.NA,
    'raw_answers': pd.NA,
    'upload_time': pd.NA,
    'api_time': pd.NA,
    'full_response': pd.NA
})

# Find unprocessed videos
total = event_log['outcome_classification'].isna().sum()
save_name = os.path.join(save_path, "video_analysis_results.csv")

pbar = tqdm(total=total, desc="Processing videos", leave=True, position=0)

try:
    for idx, row in event_log.iterrows():
        if pd.isna(row['outcome_classification']):
            try:
                video_path = row['video_path']
                
                # Upload video directly
                start_time = time.time()
                video_file = client.files.upload(file=video_path)
                
                while video_file.state.name == "PROCESSING":
                    sleep(1)
                    video_file = client.files.get(name=video_file.name)
                
                upload_time = time.time()
                
                response = client.models.generate_content(
                    model=f"models/{MODEL_ID}",
                    contents=[
                        video_file,
                        a1,
                        a2,
                        a3,
                        a4,
                        a5],
                    config=config,
                )
                
                api_call_time = time.time()
                client.files.delete(name=video_file.name)
                
                event_log.loc[idx, 'raw_answers'] = str(response.text)
                event_log.loc[idx, 'upload_time'] = upload_time - start_time
                event_log.loc[idx, 'api_time'] = api_call_time - upload_time

                full_dic = response.to_json_dict()
                event_log.loc[idx, 'full_response'] = json.dumps(full_dic)
                
                analysis: TrialAnalysis = response.parsed
                answers = analysis.model_dump_json()
                answers = json.loads(answers)
                
                event_log.loc[idx, 'tongue_contact'] = answers['tongue_contact']
                event_log.loc[idx, 'water_drop_stable'] = answers["water_drop_stable"]
                event_log.loc[idx, 'water_spilled'] = answers["water_spilled"]
                event_log.loc[idx, 'outcome_classification'] = answers['outcome_classification']
                event_log.loc[idx, 'justification'] = answers["justification"]
                
            except Exception as e:
                print(f'Error processing video at index {idx}')
                print(f'Video path: {row["video_path"]}')
                print(e)

            pbar.update(1)
    
    event_log.to_csv(save_name, index=False)
    
except Exception as e:
    print(f"Error occurred during processing")
    print(e)
    event_log.to_csv(save_name, index=False)

pbar.close()

Processing trials in FJ_R3_2024-07-15_1:   0%|          | 0/6 [00:00<?, ?it/s]

Processing trials in FJ_R3_2024-07-15_1: 100%|██████████| 6/6 [03:53<00:00, 38.96s/it]
Processing trials in FJ_L3_2024-07-15_1:  55%|█████▌    | 27/49 [18:04<13:59, 38.14s/it]