In [1]:
# import sys
# !{sys.executable} -m pip uninstall pandas numpy -y

In [2]:
# import sys
# !{sys.executable} -m pip install pandas numpy

In [3]:
# import sys
# !{sys.executable} -m pip install google-generativeai

In [4]:
import os
import json
from pathlib import Path
from typing import List, Dict, Any
import pandas as pd
import google.generativeai as genai
import time
import getpass



In [None]:
# --- 1. Gemini API Configuration ---
GEMINI_API_KEY = "AIzaSyBO1deeym5AKDTGLOUGMiKDzAyRsKK75r8"

GENAI_MODEL_NAME = "gemini-1.5-pro-latest"

# --- 2. Input Query & Output Files ---
QUERY_ID = "airball_1"
QUERY_VIDEO_PATH = rf"media\{QUERY_ID}.mp4" # Path to the main video to annotate

JSON_OUTPUT_PATH = f"{QUERY_ID}_annotations.json"
CSV_OUTPUT_PATH = f"{QUERY_ID}_annotations.csv"

# --- 3. (NEW) Similarity Results & "Top-K" ---
# -----------------------------------------------------------------
# This is where you paste the output from your other script.
# (I'm just using dummy data here as an example)
# -----------------------------------------------------------------
TOP_K = 3 # How many context videos to use? (e.g., Top 3)

# Paste your list of (video_name, score) tuples for each condition
most_similar_visual = [('airball_8.mp4', 0.85), ('airball_3.mp4', 0.79), ('where_1.mp4', 0.75), ('airball_11.mp4', 0.72)]
most_similar_audio = [('airball_8.mp4', 0.92), ('airball_9.mp4', 0.88), ('airball_10.mp4', 0.81)]
most_similar_text = [('airball_5.mp4', 0.78), ('airball_9.mp4', 0.77), ('airball_10.mp4', 0.71)]
most_similar_vis_aud = [('airball_3.mp4', 0.91), ('airball_8.mp4', 0.89), ('airball_11.mp4', 0.85)]
most_similar_aud_txt = [('airball_9.mp4', 0.95), ('airball_5.mp4', 0.92), ('airball_10.mp4', 0.90)]
most_similar_vis_txt = [('airball_3.mp4', 0.82), ('airball_11.mp4', 0.80), ('airball_5.mp4', 0.79)]
most_similar_vis_aud_txt = [('airball_8.mp4', 0.98), ('airball_3.mp4', 0.96), ('airball_9.mp4', 0.95)]

# --- 4. (NEW) Helper to Build Context Sets ---
# This part automatically builds the full file paths
# -----------------------------------------------------------------
def build_paths(similar_list: List, base_path: str, k: int) -> List[str]:
    # 1. Get just the video names (using your logic)
    video_names = [video_name for video_name, _ in similar_list]
    
    # 2. Get just the Top-K
    top_k_names = video_names[:k]
    
    # 3. Build the full file paths
    # Using rf"" (raw string literal) to handle Windows paths
    return [rf"{base_path}\{name}" for name in top_k_names]

# Define the base folders for your context videos
# (You might need to adjust these paths)
PATH_VIS = r"media"
PATH_AUD = r"media"
PATH_TXT = r"media"
PATH_VA = r"media"

SyntaxError: unterminated string literal (detected at line 51) (2318235494.py, line 51)

In [None]:
# --- System-level instructions ---
BASELINE_SYSTEM = (
    "You are an assistant tasked with generating a brief summary of a short video. "
    "Use only the information available in the video. Do not rely on any external "
    "knowledge or assumptions. Focus on describing what is happening in the video "
    "concisely."
)

CONTEXT_AWARE_SYSTEM = (
    "You are an assistant tasked with generating a summary of a short video. "
    "You are provided with the main video and a few additional videos that are "
    "semantically related. Use all available information to generate a summary "
    "that best describes what is happening in the main video. Focus on enhancing "
    "your understanding using the related videos, but ensure the summary reflects "
    "the main video."
)

# --- User prompts ---
BASELINE_USER = (
    "Please generate a 2–3 sentence summary of the following video based solely "
    "on its content."
)

CONTEXT_AWARE_USER = (
    "Please summarize the main video using all the information provided. The first "
    "video is the main one, and the others are related videos that may provide "
    "helpful context. Your summary should describe what is happening in the main "
    "video in 2–3 sentences."
)

# --- Names for the 7 context sets ---
CONDITION_NAMES = [
    "visual_only",       # set 1
    "audio_only",        # set 2
    "text_only",         # set 3
    "visual_plus_audio", # set 4
    "audio_plus_text",   # set 5
    "visual_plus_text",  # set 6
    "visual_audio_text", # set 7
]

In [None]:
try:
    genai.configure(api_key=GEMINI_API_KEY)
    print("Gemini API Key configured successfully.")
except Exception as e:
    print(f"Error configuring Gemini API: {e}")

In [None]:
# Cache to store uploaded file objects
_uploaded_cache: Dict[str, Any] = {}

def _upload_video(path: str):
    """
    Upload a video to Gemini Files API once and wait until it's ACTIVE.
    """
    global _uploaded_cache
    full = str(Path(path).resolve())

    if full not in _uploaded_cache:
        print(f"Uploading: {full}")
        try:
            file_obj = genai.upload_file(path=full)
            print("Uploaded, waiting for processing...")

            # Wait until ACTIVE
            while True:
                file_obj = genai.get_file(file_obj.name)
                if file_obj.state.name == "ACTIVE":
                    print(f"✅ File is ACTIVE: {file_obj.name}")
                    break
                elif file_obj.state.name == "FAILED":
                    raise RuntimeError(f"❌ File {file_obj.name} failed to process.")
                time.sleep(2)
            
            _uploaded_cache[full] = file_obj
        
        except Exception as e:
            print(f"❌ Error uploading {path}: {e}")
            return None # Return None on failure

    return _uploaded_cache.get(full)


def _make_model(system_instruction: str):
    return genai.GenerativeModel(
        model_name=GENAI_MODEL_NAME,
        system_instruction=system_instruction,
    )

print("Helper functions defined.")

In [None]:
def annotate_single_query(
    query_video_path: str,
    context_sets: List[List[str]],
    query_id: str = None,
) -> List[Dict[str, Any]]:
    """
    Run 8 Gemini calls for one query video:
      - baseline (no extra context)
      - 7 context-aware conditions in the order of CONDITION_NAMES
    """
    if len(context_sets) != 7:
        raise ValueError("context_sets must be a list of 7 context video sets.")

    if query_id is None:
        query_id = Path(query_video_path).stem

    baseline_model = _make_model(BASELINE_SYSTEM)
    context_model = _make_model(CONTEXT_AWARE_SYSTEM)

    query_file = _upload_video(query_video_path)
    if query_file is None:
        print(f"❌ Aborting: Failed to upload main query video {query_video_path}")
        return []

    results: List[Dict[str, Any]] = []

    # --------- 1) Baseline (query only) ---------
    print(f"\n[Query {query_id}] Baseline annotation …")
    try:
        baseline_response = baseline_model.generate_content(
            [query_file, BASELINE_USER]
        )
        baseline_text = baseline_response.text.strip()
    except Exception as e:
        print(f"  ❌ Error in baseline generation: {e}")
        baseline_text = f"ERROR: {e}"

    results.append({
        "query_id": query_id,
        "annotation_type": "baseline",
        "condition_index": 0,
        "condition_name": "baseline",
        "query_video_path": query_video_path,
        "context_video_paths": [],
        "annotation_text": baseline_text,
    })

    # --------- 2) 7 context-aware annotations ---------
    for idx, (cond_name, ctx_paths) in enumerate(zip(CONDITION_NAMES, context_sets), start=1):
        print(f"[Query {query_id}] Context condition {idx}: {cond_name} "
              f"with {len(ctx_paths)} context videos …")
        
        # Upload context videos, skipping any that fail
        ctx_files = []
        for p in ctx_paths:
            f = _upload_video(p)
            if f:
                ctx_files.append(f)

        # Main video first, then context videos, then the user prompt
        contents = [query_file] + ctx_files + [CONTEXT_AWARE_USER]

        ctx_text = ""
        try:
            ctx_response = context_model.generate_content(contents)
            ctx_text = ctx_response.text.strip()
        except Exception as e:
            print(f"  ❌ Error in context generation for {cond_name}: {e}")
            ctx_text = f"ERROR: {e}"

        results.append({
            "query_id": query_id,
            "annotation_type": "context_aware",
            "condition_index": idx,      # 1..7
            "condition_name": cond_name, # matches CONDITION_NAMES
            "query_video_path": query_video_path,
            "context_video_paths": ctx_paths,
            "annotation_text": ctx_text,
        })

    return results

print("Core annotation function defined.")

In [None]:
def save_annotations(
    annotations: List[Dict[str, Any]],
    json_path: str,
    csv_path: str,
):
    """Saves the list of annotation results to JSON and CSV."""
    if not annotations:
        print("No annotations to save.")
        return
        
    # JSON (all records in one list)
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(annotations, f, indent=2, ensure_ascii=False)

    # CSV
    df = pd.DataFrame(annotations)
    df.to_csv(csv_path, index=False)

    print(f"\nSaved annotations to:\n  JSON: {json_path}\n  CSV:  {csv_path}")

print("Save utility defined.")

In [None]:
print(f"Starting annotation for query: {QUERY_ID}")
print(f"Query video: {QUERY_VIDEO_PATH}")
print(f"Using model: {GENAI_MODEL_NAME}")

# Clear the cache for a fresh run (optional, remove if you want to reuse uploads)
_uploaded_cache = {} 

# --- Run the main function ---
all_annotations = annotate_single_query(
    query_video_path=QUERY_VIDEO_PATH, # <--- Uses variable from Cell 2
    context_sets=CONTEXT_SETS,     # <--- Uses variable from Cell 2
    query_id=QUERY_ID,             # <--- Uses variable from Cell 2
)

# --- Save the results ---
if all_annotations:
    save_annotations(
        annotations=all_annotations,
        json_path=JSON_OUTPUT_PATH, # <--- Uses variable from Cell 2
        csv_path=CSV_OUTPUT_PATH,   # <--- Uses variable from Cell 2
    )
else:
    print("Annotation process failed, no results to save.")

print("\n--- Annotation process complete. ---")

In [None]:
try:
    df_results = pd.read_csv(CSV_OUTPUT_PATH)
    display(df_results)
except FileNotFoundError:
    print(f"Could not find CSV file at {CSV_OUTPUT_PATH}")
except Exception as e:
    print(f"An error occurred while reading the CSV: {e}")