In [157]:
import pandas as pd
import litellm
import os
import re
from google import genai
from google.genai import types
from google.genai.errors import APIError
from pathlib import Path
import json
import boto3
from botocore.exceptions import NoCredentialsError
from dotenv import load_dotenv

In [158]:
# API_KEY = insert your own
# client = genai.Client(api_key=API_KEY)
load_dotenv()
api_key = os.getenv("API_KEY")
AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY")
AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY")
BUCKET_NAME = "linqia-video-analyser-bucket"
client = genai.Client(api_key=api_key)


In [159]:
NEW_PROMPT_TEXT = """
You are an AI video analysis system. You MUST follow these rules:

STRICT RULES (NO HALLUCINATION):
- Do NOT guess, infer, assume, or add information not explicitly shown or spoken.
- If something is unclear or not confirmed visually or through transcript, return “No category”, “Unknown”, or “None”.
- Do NOT infer creator intent, emotions, demographics, motivations.
- Do NOT identify brands unless a logo is clearly readable OR the transcript explicitly names the brand.
- If multiple interpretations are possible, choose the LEAST specific.
- Prefer “No category” instead of guessing.
- Transcript overrides for spoken facts. Visuals override for visual facts.
- If visuals & transcript conflict → choose “Unknown”.

INPUTS:
- Video (creator video)
- Transcript (spoken content)

TASK A — VIDEO SUMMARY  
Provide a strictly summary of this video describing ONLY what is visually and audibly confirmed.

TASK B — CLASSIFICATION  
Classify the video using ONLY the allowed labels below.  
If it does not match ANY category → return “No category”.

────────────────────────────────────────────────────────
1) CONTENT THEME (choose MULTIPLE)
[Recipes, Cooking, Baking, Food Reviews, Restaurant Tours, Pets, Dog Training, Cat Care, Exotic Animals, Travel, Budget Travel, Luxury Travel, Adventure Travel, Family, Parenting, Single Parenting, Large Family Life, Cars, Car Reviews, Auto Repair, Classic Cars, Fashion, Streetwear, High Fashion, Thrifting, Beauty, Skincare, Makeup Tutorials, Hair Styling, Fitness, Weightlifting, Yoga, CrossFit, Technology, Gadget Reviews, Coding Tutorials, Tech News, Gaming, Game Reviews, Let's Plays, Esports, Music, Instrument Tutorials, Music Production, Song Covers, DIY/Crafts, Home Improvement, Upcycling, Arts and Crafts, Education, Language Learning, Science Experiments, History Lessons, Humor, Stand-up Comedy, Pranks, Sketch Comedy, Lifestyle, Minimalism, Luxury Living, Van Life, Personal Finance, Budgeting Tips, Investing Basics, Cryptocurrency, Mental Health, Meditation, Therapy Insights, Self-Care Tips, Gardening, Urban Gardening, Permaculture, Plant Care, Sports, Sports Analysis, Athlete Interviews, Training Tips, Books, Book Reviews, Author Interviews, Reading Challenges, Art, Painting Tutorials, Digital Art, Art History, Photography, Camera Reviews, Photo Editing Tutorials, Photography Tips, Environmental, Sustainability Tips, Climate Change Awareness, Zero Waste Living, Politics, Political Analysis, Activism, Civic Education, Spirituality, Meditation Guides, Religious Teachings, New Age Practices, Business, Entrepreneurship, Marketing Tips, Small Business Advice, Science, Space Exploration, Biology Facts, Chemistry Experiments, Film/TV, Movie Reviews, TV Show Recaps, Behind-the-Scenes, Fashion, Outfit Ideas, Fashion History, Sustainable Fashion]

2) CONTENT STYLE (choose MULTIPLE)
[ASMR, Skits, Transitions, Graphics-heavy, Vlogs, Day-in-the-Life, Tutorials, Reviews, Unboxing, Challenges, Q&A, Interviews, Reaction Videos, Compilations, Time-lapse, Slow Motion, Stop Motion, Live Streaming, Storytelling, Podcasts, Listicles, How-To Guides, Before and After, Pranks, Social Experiments, Parodies, Montages, Behind-the-Scenes, Hauls, Lookbooks, Product Demos, Taste Tests, Mukbang, Room Tours, Workout Routines, Outfit of the Day, Get Ready With Me, What I Eat in a Day, Morning Routines, Night Routines, Transformation Videos, Expectation vs. Reality, Duets/Collaborations, Rants, Storytime, Ambient/Background Videos, Shorts/Reels/TikToks, Educational Animations, Whiteboard Explainers, Green Screen Effects, Point-of-View (POV), Lip Sync, Dance Choreography, Talent Showcases, Study With Me]

3) CREATOR PRESENCE (choose MULTIPLE)
[Hands-only, Creator-centric, Silhouettes, No visible people, Occasional appearances, Group content, Face only, Full body, Voice-only, Animated avatar, Family-focused, Pet-focused, Product-focused, Guest-starring, Cameo appearances, Behind the camera, Partial visibility]

4) ON-SCREEN TEXT / GRAPHICS (choose MULTIPLE)
["POV", Closed Captioning, Brand Logos, FTC Disclosures (#ad/#brandpartner), Subtitles, Emojis, Infographics, Statistics, Quotes, Timestamps, Pricing Information, Product Names, Usernames/Handles, Clickable Links, Countdown Timers, Call-to-Action Buttons, Animated Text, Lyric Displays, Screen Overlays, Lower Thirds, Pop-up Bubbles, Tutorial Steps, Recipe Ingredients, Titles/Headlines, End Screens, Watermarks, Location Tags, Age Restrictions, Content Warnings, Fact Boxes, Scoreboard/Leaderboard]

5) KEY VIDEO ELEMENTS (choose MULTIPLE)
[Creator, Product, Pet, Child, Text/Graphics, Music/Sound, Special Effects, Props, Food, Vehicles, Technology, Nature, Urban Environment, Artwork, Sports Equipment, Clothing/Fashion Items, Books, Musical Instruments, Fitness Equipment, Beauty Products, Home Decor, DIY Projects, Collectibles, Celebrity Guests, Expert Interviews, Audience Participation, Screen Recordings, Archival Footage]

6) BRAND SAFETY CATEGORIES (choose MULTIPLE)
[Family-friendly, Mild language, Strong language, Sexual content, Violence, Drug use, Alcohol consumption, Revealing clothing, Controversial topics, Political content, Religious content, Graphic medical content, Dangerous stunts, Potential copyright issues, Unverified health claims, Financial advice, Sensitive social issues, Depictions of gambling, Weapons, Extreme sports, Conspiracy theories, Potential misinformation, Satire/parody]

7) BRAND TYPE (choose MULTIPLE)
[No brands featured, Multiple brands featured, Exclusive brand partnership, Competitor brands shown, Small/local brands, Luxury brands, Tech brands, Fashion brands, Beauty brands, Food and beverage brands, Automotive brands, Travel/hospitality brands, Fitness/health brands, Home goods brands, Entertainment brands, Financial services brands, Educational brands, Pet care brands, Sustainability-focused brands, Sports equipment brands, Gaming brands, Subscription services, Direct-to-consumer brands, Retail chains]

8) BRAND NAMES (free text; “None” if no brands visible or spoken)

9) CAPTION for the video (2–3 sentences, strictly factual, no assumptions)

10) Video Transcript:
    **Role:** You are a professional video transcriber and editor.
    **Task:** Create a verbatim transcript of the SPOKEN audio in this video file.
    **Strict Constraints:**
        1. **Audio Only:** Transcribe only what is spoken. Do NOT transcribe text that appears on the screen (titles, lower thirds, subtitles, or background signs).
        2. **No Visual Descriptions:** Do not describe the scene, actions, or physical appearance of the speakers.
        3. **Clean Up:** Remove filler words (like "um," "uh," "ah") and false starts to make the text human-readable, but do not change the meaning or vocabulary used by the speakers.
        4. **Formatting:** Use proper punctuation and paragraph breaks to ensure readability.
    **Output:** Provide the transcript in plain text format.

────────────────────────────────────────────────────────
OUTPUT FORMAT — STRICT JSON ONLY

{
  "summary": "",
  "content_theme": "",
  "content_style": "",
  "creator_presence": "",
  "on_screen_text": [],
  "key_video_elements": [],
  "brand_safety": [],
  "brand_type": "",
  "brand_names": [],
  "transcript": ""
}

Return ONLY this JSON. No explanations.
"""

In [None]:
NEW_PROMPT_TEXT_3 = """
You are an AI video analysis system. You MUST follow these modular rules for accuracy, consistency, and confidence-based classification.

────────────────────────────────────────────────────────
MODULE 1 — NO HALLUCINATION PRINCIPLES
────────────────────────────────────────────────────────
- NEVER guess, assume, or infer beyond what is visually or audibly confirmed.
- If something is unclear, ambiguous, or partially visible/spoken → return “Unknown”, “None”, or “No category”.
- If multiple interpretations are possible, choose the LEAST specific.
- If visuals and transcript conflict → return “Unknown”.
- If unsure whether a label applies → it does NOT.
- Do NOT overlabel. Assume nothing. This video stands alone.
- Omission is better than false inclusion. Inaction > Wrong Action.
- **Confidence Thresholding:** Only assign a label if it is unambiguously and confidently supported by the transcript or visible content. If there is doubt, leave the label out.
- **Minimal Valid Labels Policy:** You are encouraged to return the fewest possible accurate labels. Excessive or unnecessary labeling is considered a failure.
- **Negative Instruction:** Do NOT include unrelated themes or styles even if they are common in similar videos. Never assume categories from genre familiarity.
- **Hard Limit on Label Count (Optional):** You may return no more than 5 content_theme labels unless the video clearly contains more, with direct confirmation.

────────────────────────────────────────────────────────
MODULE 2 — BODY VISIBILITY RULES
────────────────────────────────────────────────────────
- “FACE ONLY” → select ONLY if no part of the body beyond face/neck is visible at ANY time.
- “FULL BODY” → select if head, torso, and legs are visible at the same time in at least one frame.
- “UPPER BODY” → select if torso and arms are visible but legs are not.
- NEVER select logically conflicting categories.
- Apply based ONLY on confirmed visual evidence.

────────────────────────────────────────────────────────
MODULE 3 — CATEGORY SELECTION RULES
────────────────────────────────────────────────────────
- A label must represent the **PRIMARY FOCUS** of the video.
- Do NOT assign categories based on incidental objects (e.g., phones, pets, clothing).
- A label should only be selected if the object or concept is central, discussed, or demonstrated.
- Broad categories like “Lifestyle” or “Education” require strong confirmation from visuals or transcript.
- Reinforce: Labels must be clearly supported by the actual video. If unsure, exclude it.
- Reinforce: Minimal, highly accurate labeling is the goal. Excessive or unnecessary labels undesirable.

────────────────────────────────────────────────────────
MODULE 4 — TUTORIAL IDENTIFICATION RULES
────────────────────────────────────────────────────────
- Label as “Tutorial” ONLY IF:
  a) The transcript includes explicit instructional language:
     ("how to", "step one", "let me show you", etc.), OR  
  b) The video clearly demonstrates an ordered teaching sequence.
- A demonstration without teaching intent is NOT a tutorial.

────────────────────────────────────────────────────────
MODULE 5 — TRANSITION DETECTION RULES
────────────────────────────────────────────────────────
- Mark “Transitions” ONLY if you observe clear editing changes:
  (cuts, fades, jump cuts).
- Do NOT label transitions based on camera movement alone.
- If editing is unclear → return “None”.

────────────────────────────────────────────────────────
MODULE 5A — TRANSITION DETECTION RULES
────────────────────────────────────────────────────────
MONTAGE / COMPILATION IDENTIFICATION RULES
- Label as “Montages” or “Compilations” if:
  a) The video presents multiple similar scenes or items,
  b) Each shown briefly,
  c) With repeated cuts between items,
  d) And no single continuous narrative dominates.
- Common examples include:
  • Multiple dishes shown sequentially
  • Multiple locations shown briefly
  • Repeated short clips of similar actions


────────────────────────────────────────────────────────
MODULE 6 — BRAND SAFETY EVALUATION
────────────────────────────────────────────────────────
Evaluate risk across ALL modalities:
a) AUDIO (spoken words & lyrics)  
b) VISUALS (graphic/explicit imagery)  
c) ON-SCREEN TEXT (if provided)

▶ LANGUAGE & LYRICS
- Treat background music lyrics as equal to spoken dialogue.
- If profane, explicit, or offensive lyrics are clearly audible → flag appropriately.
- If lyrics are unintelligible → return “Unknown”.

▶ VISUAL RISK
- Flag risk ONLY if visual content is clearly graphic or explicit (e.g., blood, wounds, nudity).
- Do NOT reduce severity based on medical or educational context.
- Suggestive visuals = “Unknown” unless clearly graphic.

▶ TEXT RISK
- If visible on-screen text contains risky language (e.g. profanity, slurs), flag accordingly.

────────────────────────────────────────────────────────
MODULE 7 — BRAND LOGO DETECTION RULES
────────────────────────────────────────────────────────
- A brand may be identified ONLY if:
  a) A logo is clearly visible and readable on-screen (including clothing, accessories, packaging, signage, or objects), OR
  b) The brand name is explicitly spoken in the transcript.
- Logos on clothing, hats, shoes, or worn items count as valid brand evidence if readable.
- Stylized designs that resemble a brand but are not clearly readable MUST be ignored.
- If a logo is visible but not legible → do NOT assign a brand.

────────────────────────────────────────────────────────
MODULE 8 — ON-SCREEN TEXT / GRAPHICS RULES (STRICT MODE)
────────────────────────────────────────────────────────
- The “on_screen_text” field MUST contain ONLY category labels from the approved list.
- You MUST NOT output actual words, phrases, sentences, captions, subtitles, or quotes.
- You MUST NOT include spoken words, transcript text, or paraphrases.
- Even if text is visible AND spoken, it still MUST NOT appear here.
- This field represents WHAT TYPE of text is shown, not WHAT THE TEXT SAYS.
- Any violation of this rule is considered a failure.
- If no categories apply → return [].

────────────────────────────────────────────────────────
MODULE 9 — TRANSCRIPTION GENERATION RULES
────────────────────────────────────────────────────────
- Transcribe ONLY what is audibly spoken.
- Transcribe ONLY foreground spoken dialogue intended as speech.
- Do NOT transcribe background music lyrics unless:
  a) The lyrics are clearly foregrounded and intended to be heard as speech, OR
  b) The speaker is performing/singing as the primary content.
- Background music lyrics may still be evaluated for BRAND SAFETY,
  but must NOT appear in the transcript unless clearly foregrounded.
- Do NOT transcribe visible text unless it is clearly spoken aloud.
- DO NOT describe visuals.
- Remove filler words and false starts. Format properly with punctuation and paragraphing.
- Output transcript in plain, readable text format.

────────────────────────────────────────────────────────
MODULE 10 — TRANSCRIPTION GENERATION RULES
────────────────────────────────────────────────────────
CONTENT THEME SELECTION CONSTRAINT
- You MUST identify:
  • 1 Primary content theme (required if any apply)
  • Up to 4 Secondary themes (optional)
- Do NOT exceed 5 total content_theme labels unless the video clearly contains multiple unrelated segments.

────────────────────────────────────────────────────────
INPUTS:
────────────────────────────────────────────────────────
- Video (creator video)
- Transcript (spoken content only)
- Parsed On-Screen Text/Graphics

────────────────────────────────────────────────────────
TASK A — VIDEO SUMMARY  
Provide a strictly factual, concise summary describing ONLY what is visually and audibly confirmed. Do NOT include inferred motivations, emotions, or assumptions.

TASK B — VIDEO CLASSIFICATION  
Classify the video using ONLY the provided labels. Choose only those that are confidently justified by visual or spoken content.  
If no confident match → return “No category”.

────────────────────────────────────────────────────────
1) CONTENT THEME (choose MULTIPLE)
[Recipes, Cooking, Baking, Food Reviews, Restaurant Tours, Pets, Dog Training, Cat Care, Exotic Animals, Travel, Budget Travel, Luxury Travel, Adventure Travel, Family, Parenting, Single Parenting, Large Family Life, Cars, Car Reviews, Auto Repair, Classic Cars, Fashion, Streetwear, High Fashion, Thrifting, Beauty, Skincare, Makeup Tutorials, Hair Styling, Fitness, Weightlifting, Yoga, CrossFit, Technology, Gadget Reviews, Coding Tutorials, Tech News, Gaming, Game Reviews, Let's Plays, Esports, Music, Instrument Tutorials, Music Production, Song Covers, DIY/Crafts, Home Improvement, Upcycling, Arts and Crafts, Education, Language Learning, Science Experiments, History Lessons, Humor, Stand-up Comedy, Pranks, Sketch Comedy, Lifestyle, Minimalism, Luxury Living, Van Life, Personal Finance, Budgeting Tips, Investing Basics, Cryptocurrency, Mental Health, Meditation, Therapy Insights, Self-Care Tips, Gardening, Urban Gardening, Permaculture, Plant Care, Sports, Sports Analysis, Athlete Interviews, Training Tips, Books, Book Reviews, Author Interviews, Reading Challenges, Art, Painting Tutorials, Digital Art, Art History, Photography, Camera Reviews, Photo Editing Tutorials, Photography Tips, Environmental, Sustainability Tips, Climate Change Awareness, Zero Waste Living, Politics, Political Analysis, Activism, Civic Education, Spirituality, Meditation Guides, Religious Teachings, New Age Practices, Business, Entrepreneurship, Marketing Tips, Small Business Advice, Science, Space Exploration, Biology Facts, Chemistry Experiments, Film/TV, Movie Reviews, TV Show Recaps, Behind-the-Scenes, Fashion, Outfit Ideas, Fashion History, Sustainable Fashion, Oddly Satisfying, Sensory Video, Debating, Social Commentary, Philosophy, Study, Academics, Exam Preparation, Research, Student Life, Campus Tours, Interview Tips, Racing, Religion, Extreme Sports, Wildlife, Motivation, Famous Quotes, Walking Tour, Health & Wellbeing, Medical, Career, Aviation, Geography, Investments, Property, Celebrities, Entertainment News, Back to School, Couples]

2) CONTENT STYLE (choose MULTIPLE)
[ASMR, Skits, Transitions, Graphics-heavy, Vlogs, Day-in-the-Life, Tutorials, Reviews, Unboxing, Challenges, Q&A, Interviews, Reaction Videos, Compilations, Time-lapse, Slow Motion, Stop Motion, Live Streaming, Storytelling, Podcasts, Listicles, How-To Guides, Before and After, Pranks, Social Experiments, Parodies, Montages, Behind-the-Scenes, Hauls, Lookbooks, Product Demos, Taste Tests, Mukbang, Room Tours, Workout Routines, Outfit of the Day, Get Ready With Me, What I Eat in a Day, Morning Routines, Night Routines, Transformation Videos, Expectation vs. Reality, Duets/Collaborations, Rants, Storytime, Ambient/Background Videos, Shorts/Reels/TikToks, Educational Animations, Whiteboard Explainers, Green Screen Effects, Point-of-View (POV), Lip Sync, Dance Choreography, Talent Showcases, Response Videos, Text-based, Talking-Head, Demonstration, Ragebait, Philosophical, Motivational Content, Study With Me, Study Tips, Tips and Tricks, Medical Advice, Quiz, Travel Itineraries, Case Studies, Street Interview, What's In My Bag, Giveaways, Product Reviews, Service Reviews, Success Stories]

3) CREATOR PRESENCE (choose MULTIPLE)
[Hands-only, Creator-centric, Silhouettes, No visible people, Occasional appearances, Group content, Face only, Full body, Upper Body, Voice-only, Animated avatar, Family-focused, Pet-focused, Product-focused, Guest-starring, Cameo appearances, Behind the camera, Partial visibility]

4) ON-SCREEN TEXT / GRAPHICS (choose MULTIPLE - SELECT ONLY from provided list; do NOT output transcript)
["POV", Closed Captioning, Brand Logos, FTC Disclosures (#ad/#brandpartner), Subtitles, Emojis, Infographics, Statistics, Quotes, Timestamps, Pricing Information, Product Names, Usernames/Handles, Clickable Links, Countdown Timers, Call-to-Action Buttons, Animated Text, Lyric Displays, Screen Overlays, Lower Thirds, Pop-up Bubbles, Tutorial Steps, Recipe Ingredients, Titles/Headlines, End Screens, Watermarks, Location Tags, Age Restrictions, Content Warnings, Fact Boxes, Scoreboard/Leaderboard]

5) KEY VIDEO ELEMENTS (choose MULTIPLE)
[Creator, Product, Pet, Child, Text/Graphics, Music/Sound, Special Effects, Props, Food, Vehicles, Technology, Nature, Urban Environment, Artwork, Sports Equipment, Clothing/Fashion Items, Books, Musical Instruments, Fitness Equipment, Beauty Products, Home Decor, DIY Projects, Collectibles, Celebrity Guests, Expert Interviews, Audience Participation, Screen Recordings, Archival Footage, Medicine, Electronics, Drinks, Accessories, Travel Accessories, Transportation, Map, Routes, News/Headlines, Quotes, Hardware, Military, Nutrition, Healthcare, Property, Stocks, Cryptocurrency, Hair Appliances, Tools, Gardening Equipment, Cleaning Supplies, Office Supplies, Toys, Baby Products, Party Supplies, Seasonal Items, Holiday Decorations, Stationery, Art Supplies]

6) BRAND SAFETY CATEGORIES (choose MULTIPLE)
[Family-friendly, Not family-friendly, Mild language, Strong language, Sexual content, Violence, Drug use, Alcohol consumption, Revealing clothing, Controversial topics, Political content, Religious content, Graphic medical content, Dangerous stunts, Potential copyright issues, Unverified health claims, Financial advice, Sensitive social issues, Depictions of gambling, Weapons, Extreme sports, Conspiracy theories, Potential misinformation, Satire/parody]

7) BRAND TYPE (choose MULTIPLE)
[No brands featured, Multiple brands featured, Exclusive brand partnership, Competitor brands shown, Small/local brands, Luxury brands, Tech brands, Fashion brands, Beauty brands, Food and beverage brands, Automotive brands, Travel/hospitality brands, Fitness/health brands, Home goods brands, Entertainment brands, Financial services brands, Educational brands, Pet care brands, Sustainability-focused brands, Sports equipment brands, Gaming brands, Subscription services, Direct-to-consumer brands, Retail chains]

8) BRAND NAMES (free text; “None” if no brands visible or spoken)

9) CAPTION for the video (2–3 sentences, strictly factual, no assumptions)

10) Video Transcript:
    **Role:** You are a professional video transcriber and editor.
    **Task:** Create a verbatim transcript of the SPOKEN audio in this video file.
    **Strict Constraints:**
        1. **Audio Only:** Transcribe only what is spoken. If words are visible on screen (e.g. titles, lower thirds, subtitles, background signs, images on a laptop) but not audibly spoken, they must NOT appear in the transcript. Do NOT merge or reconcile audio with on-screen text. Visual text is NOT evidence of speech.
        2. **No Visual Descriptions:** Do not describe the scene, actions, or physical appearance of the speakers.
        3. **Clean Up:** Remove filler words (like "um," "uh," "ah") and false starts to make the text human-readable, but do not change the meaning or vocabulary used by the speakers.
        4. **Formatting:** Use proper punctuation and paragraph breaks to ensure readability.
    **Output:** Provide the transcript in plain text format.

────────────────────────────────────────────────────────
OUTPUT FORMAT — STRICT JSON ONLY
────────────────────────────────────────────────────────

{
  "summary": "",
  "content_theme": [],
  "content_style": [],
  "creator_presence": [],
  "on_screen_text": [],
  "key_video_elements": [],
  "brand_safety": [],
  "brand_type": [],
  "brand_names": [],
  "caption": "",
  "transcript": ""
}

Return ONLY this JSON. No explanations.
"""

In [161]:
def analyze_video_url(video_url: str, prompt_text: str = NEW_PROMPT_TEXT_3) -> str:
    """
    Analyzes a single video URL using litellm and the Gemini model.

    """
    if not video_url or not isinstance(video_url, str) or not video_url.startswith('http'):
        print(f"Skipping invalid URL: {video_url}")
        return "Error: Invalid or empty URL"

    print(f"Analyzing video: {video_url}...")
    
    # This is the message structure for multimodal input (text + video)
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt_text
                },
                {
                    "type": "image_url",  # This key is used for video as well
                    "image_url": {
                        "url": video_url
                    }
                }
            ]
        }
    ]
    
    try:
        response = litellm.completion(
            model="gemini/gemini-2.5-flash-lite", # Using the latest flash  
            #model = "gemini/gemini-2.5-pro",
            messages=messages,
            api_key=api_key,
            timeout=300 # 5-minute timeout for potentially long videos
        )
        # Extract the text content from the response
        content = response.choices[0].message.content
        print("Analysis complete.")
        return content
    except Exception as e:
        print(f"Error analyzing video {video_url}: {e}")
        return f"Error: {e}"

In [162]:
def extract_json(response_text: str) -> dict:
    """
    Attempts to extract JSON from response text that might have markdown code blocks or extra text.
    Returns a Python dictionary.
    """
    
    # Try to parse & read as-is (pure JSON) first. Great if it works, else try other method.
    try:
        return json.loads(response_text)
    except json.JSONDecodeError:
        pass
    
    # Try to extract JSON from markdown code blocks
    # Look for the pattern ```json {...}``` & extract parts b/w the code block markers
    json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
    matches = re.findall(json_pattern, response_text, re.DOTALL) 
    
    if matches:
        try:
            return json.loads(matches[0]) # 
        except json.JSONDecodeError:
            pass
    
    # Try to find JSON object in the text (look for { ... })
    try:
        start = response_text.find('{')
        end = response_text.rfind('}') + 1
        if start != -1 and end > start:
            json_str = response_text[start:end]
            return json.loads(json_str)
    except json.JSONDecodeError:
        pass
    
    # If all else fails, return None
    return None


In [163]:
def get_public_s3_urls(bucket_name: str, AWS_ACCESS_KEY: str, AWS_SECRET_KEY: str, region: str = "us-east-2"):
    """
    Connects to S3 buckets, finds and creates public URLs for all video files.
    """
    s3_client = boto3.client('s3',
        aws_access_key_id=AWS_ACCESS_KEY, # UPDATE THIS VALUE
        aws_secret_access_key=AWS_SECRET_KEY, # UPDATE THIS VALUE
        region_name=region # UPDATE THIS VALUE if different region
    )
    
    video_urls = {}
    
    try:
        response = s3_client.list_objects_v2(Bucket=bucket_name)
        
        if 'Contents' not in response: # Checks if there are any files in the bucket
            print("No files found in bucket")
            return {}
        
        for obj in response['Contents']:
            file_name = obj['Key']
            
            if any(file_name.endswith(ext) for ext in ['.mp4', '.mov', '.avi', '.mkv']):
                url = f"https://{bucket_name}.s3.{region}.amazonaws.com/{file_name}"
                # Replace spaces and special characters with URL-safe codes so web browsers can understand them
                url_encoded = url.replace(' ', '%20').replace('&', '%26')
                
                # "Videos for Analysis/Auto Repair.mp4" -> ["Videos for Analysis", "Auto Repair.mp4"]
                # Take last part "Auto Repair.mp4"
                clean_name = file_name.split('/')[-1] 
                video_urls[clean_name] = url_encoded
                print(f"{clean_name}") # Shows progress as each video is found
        
        return video_urls
        
    except Exception as e:
        print(f"Error: {e}")
        return None


In [164]:
def convert_analyses_to_csv(results: list, output_csv: str = "video_analyses_summary.csv"):
    """
    Helper function to convert JSON analysis results to CSV.
    """
    csv_rows = []
    
    for result in results:
        # Attempt to extract JSON from the response text
        analysis_json = extract_json(result['analysis'])
        
        if analysis_json:
            row = {
                'video_name': result['video_name'],
                'video_url': result.get('url', 'N/A'),
                'summary': analysis_json.get('summary', ''),
                'content_theme': analysis_json.get('content_theme', ''),
                'content_style': analysis_json.get('content_style', ''),
                'creator_presence': analysis_json.get('creator_presence', ''),
                'on_screen_text': ', '.join(analysis_json.get('on_screen_text', [])) if isinstance(analysis_json.get('on_screen_text'), list) else analysis_json.get('on_screen_text', ''),
                'key_video_elements': ', '.join(analysis_json.get('key_video_elements', [])) if isinstance(analysis_json.get('key_video_elements'), list) else analysis_json.get('key_video_elements', ''),
                'brand_safety': ', '.join(analysis_json.get('brand_safety', [])) if isinstance(analysis_json.get('brand_safety'), list) else analysis_json.get('brand_safety', ''),
                'brand_type': analysis_json.get('brand_type', ''),
                'brand_names': ', '.join(analysis_json.get('brand_names', [])) if isinstance(analysis_json.get('brand_names'), list) else analysis_json.get('brand_names', ''),
                'transcript': analysis_json.get('transcript', '')
            }
            csv_rows.append(row)
        else:
            # If JSON extraction failed, save error info
            csv_rows.append({
                'video_name': result['video_name'],
                'video_url': result.get('url', 'N/A'),
                'summary': 'ERROR: Could not parse response',
                'content_theme': '',
                'content_style': '',
                'creator_presence': '',
                'on_screen_text': '',
                'key_video_elements': '',
                'brand_safety': '',
                'brand_type': '',
                'brand_names': '',
                'transcript': result['analysis'][:500]  # First 500 chars of raw response
            })
    
    df = pd.DataFrame(csv_rows)
    df.to_csv(output_csv, index=False, encoding='utf-8')
    
    print(f"\n✓ Saved CSV with {len(csv_rows)} videos to: {output_csv}")
    
    return df

In [165]:
def analyze_s3_videos_with_csv(video_urls: dict, output_folder: str = "testing_6"):
    """
    Analyze videos from S3 URLs and save it as JSON + CSV outputs.
    """
    os.makedirs(output_folder, exist_ok=True)
    results = []
    
    for i, (video_name, url) in enumerate(video_urls.items(), 1):
        
        analysis = analyze_video_url(url, NEW_PROMPT_TEXT_3)
        
        # Save individual JSON
        output_filename = f"{Path(video_name).stem}_analysis.json"
        output_path = os.path.join(output_folder, output_filename)
        
        # Try to extract and save clean JSON
        analysis_json = extract_json(analysis)
        
        # If JSON extraction succeeds, save as JSON, else save raw text
        if analysis_json:
            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(analysis_json, f, indent=2, ensure_ascii=False)
        else:
            output_path = output_path.replace('.json', '.txt')
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(analysis)
    
        results.append({
            'video_name': video_name,
            'url': url,
            'output_file': output_path,
            'analysis': analysis
        })
        
        print(results)

    
    # Convert to CSV
    csv_output = os.path.join(output_folder, "video_analysis_summary.csv")
    df = convert_analyses_to_csv(results, csv_output)
    
    print(f"\n📊 Output: {csv_output}")
    
    return results, df

In [166]:
# Get video URLs from S3
video_urls = get_public_s3_urls(BUCKET_NAME, AWS_ACCESS_KEY, AWS_SECRET_KEY, region="us-east-2")

if video_urls:
    results, analysis_df = analyze_s3_videos_with_csv(video_urls)
    
    # # Display first 3 summaries to verify
    # print(analysis_df[['video_name', 'summary']].head(3))

Beauty 2.mp4
Beauty.mp4
Car Racing.mp4
Catching Food.mp4
Coding Tutorials.mp4
DIY Craft.mp4
Education.mp4
Emirates.mp4
Esport.mp4
Fitness.mp4
Food.mp4
Hair Tutorial.mp4
High Fashion.mp4
Instrument Tutorials.mp4
Jelly Bed.mp4
Language Learning.mp4
Luxury Travel.mp4
Med Student Day In Life.mp4
Music Production.mp4
Parenting Advice.mp4
Plant.mp4
Restaurant Visit 2.mp4
Restaurant Visit.mp4
Space.mp4
Streetwear.mp4
Study Motivation.mp4
Study Prep.mp4
Study Tips.mp4
Yoga:Pilates.mp4
Analyzing video: https://linqia-video-analyser-bucket.s3.us-east-2.amazonaws.com/Videos%20for%20Analysis/Beauty%202.mp4...
Analysis complete.
[{'video_name': 'Beauty 2.mp4', 'url': 'https://linqia-video-analyser-bucket.s3.us-east-2.amazonaws.com/Videos%20for%20Analysis/Beauty%202.mp4', 'output_file': 'testing_6/Beauty 2_analysis.json', 'analysis': '```json\n{\n  "summary": "A makeup artist applies a full face of makeup on a model, focusing on eye makeup, foundation, contour, blush, and lipstick. The video demonst

In [167]:
# # Old Prompt Text Output using Video URL
# video_url = "https://resonate-media-cdn.internal.linqia.com/influencer-content-converted-videos/2378c553fc1a1edab4ae5c3786903c92c5f4b8d9662f0e5e4e695a6c26c114a1.mp4"
# output = analyze_video_url(video_url, OLD_PROMPT_TEXT)
# print("\n=== GEMINI VIDEO ANALYSIS OUTPUT ===\n")
# print(output)

In [168]:
# # New Prompt Text Output using Video URL
# video_url = "https://resonate-media-cdn.internal.linqia.com/influencer-content-converted-videos/2378c553fc1a1edab4ae5c3786903c92c5f4b8d9662f0e5e4e695a6c26c114a1.mp4"
# output = analyze_video_url(video_url, NEW_PROMPT_TEXT)
# print("\n=== GEMINI VIDEO ANALYSIS OUTPUT ===\n")
# print(output)