In [None]:
# @title 1. Setup, Configuration, and User Intent
!pip install assemblyai openaiimport assemblyai as aai
from openai import OpenAI
from google.colab import drive, userdata
import os
import datetime
import time

print("--- Step: Importing Libraries, Checking Secrets, and Configuration ---")

# --- BEGIN USER CONFIGURATION ---
# IMPORTANT: Please update this path to your desired Google Drive folder
# where your audio files (MP3s) are located and where transcripts/summaries will be saved.
# Example: 'MyDrive/MyAudioProject/Recordings'
DRIVE_BASE_FOLDER_PATH = 'MyDrive/Recordings_Meetings_Processed'
# --- END USER CONFIGURATION ---

# --- Instructions for Adding Secrets (Keep these for users) ---
# 1. Click the key icon (🔑) in the left sidebar of Colab.
# 2. Click "+ Add a new secret".
# 3. For the Name, enter "ASSEMBLYAI_API_KEY".
# 4. For the Value, paste your AssemblyAI API key.
# 5. Toggle the "Notebook access" switch ON.
# 6. Click "+ Add a new secret" again.
# 7. For the Name, enter "OPENROUTER_API_KEY".
# 8. For the Value, paste your OpenRouter API key (or any other OpenAI-compatible API key).
# 9. Toggle the "Notebook access" switch ON.
# 10. Run this cell.

print("Libraries imported.")
print(f"Please ensure you have added ASSEMBLYAI_API_KEY and OPENROUTER_API_KEY to Colab secrets.")
print(f"This script will try to use the Google Drive folder: '{DRIVE_BASE_FOLDER_PATH}'.")
print("If this is not correct, please edit the 'DRIVE_BASE_FOLDER_PATH' variable at the top of this cell and rerun.")


# Attempt to get keys from secrets
try:
    ASSEMBLYAI_API_KEY = userdata.get('ASSEMBLYAI_API_KEY')
    OPENROUTER_API_KEY = userdata.get('OPENROUTER_API_KEY') # Used for OpenRouter or other OpenAI-compatible services
    if not ASSEMBLYAI_API_KEY:
        raise ValueError("ASSEMBLYAI_API_KEY not found in Colab secrets. Please add it and rerun.")
    if not OPENROUTER_API_KEY:
        raise ValueError("OPENROUTER_API_KEY not found in Colab secrets. Please add it and rerun.")
    print("API keys loaded successfully from secrets.")

    # Initialize API clients here now that keys are confirmed
    aai.settings.api_key = ASSEMBLYAI_API_KEY
    openrouter_client = OpenAI(
        base_url="https://openrouter.ai/api/v1", # Default for OpenRouter, user can change if needed
        api_key=OPENROUTER_API_KEY,
    )
    transcriber = aai.Transcriber()
except Exception as e:
    print(f"Error loading API keys or initializing clients: {e}")
    print("Please add the secrets as instructed above and rerun this cell.")
    raise # Stop execution if keys are missing

# --- Get User Intent ---
print("\n--- Step: Select Operation ---")
user_intent = None
while True:
    print("What would you like to do?")
    print("1: Transcribe a new audio file and then summarize it.")
    print("2: Summarize an existing transcript file (.txt).")
    user_intent_input = input("Enter your choice (1 or 2): ")
    if user_intent_input in ["1", "2"]:
        user_intent = user_intent_input
        break
    else:
        print("Invalid choice. Please enter 1 or 2.")

# --- Initialize variables ---
transcript_text = None
summary_text = None
basename_for_outputs = None # Will store the base filename (without extension) for output files
selected_mp3_filename = None # Specifically for intent 1
selected_transcript_filename = None # Specifically for intent 2
selected_mp3_path = None # Initialize to avoid reference before assignment in some paths
transcript = None # Initialize to avoid reference before assignment in some paths

# Construct the full path for Google Drive operations
# This path is now derived from the user-configurable DRIVE_BASE_FOLDER_PATH
# It assumes '/content/drive/' is the mount point for Google Drive in Colab.
drive_folder_path = os.path.join('/content/drive/', DRIVE_BASE_FOLDER_PATH)

In [None]:
# @title 2. Main Script Logic: Processing and Summarization
print("\n--- Step 1: Mounting Google Drive ---")
try:
    drive.mount('/content/drive', force_remount=True) # force_remount can be helpful
    print("Google Drive mounted successfully.")

    # After mounting, check if the target directory exists and create it if not.
    if not os.path.exists(drive_folder_path):
        print(f"The folder '{drive_folder_path}' does not exist. Attempting to create it.")
        try:
            os.makedirs(drive_folder_path)
            print(f"Successfully created folder: '{drive_folder_path}'")
        except Exception as e_create:
            print(f"Error creating folder '{drive_folder_path}': {e_create}")
            print("Please ensure the base path in Google Drive (e.g., 'MyDrive/Recordings_Meetings_Processed') is valid and you have permissions.")
            raise
    else:
        print(f"Target folder '{drive_folder_path}' found.")

except Exception as e:
    print(f"Error mounting Google Drive or preparing folder: {e}")
    raise

# --- Conditional Workflow based on User Intent ---

if user_intent == "1":
    # --- Path 1: Transcribe and Summarize ---
    print("\n--- Operation: Transcribe and Summarize ---")

    # Step 2: Select MP3 File from Configured Drive Folder
    print("\n--- Step 2: Selecting MP3 File ---")
    if not os.path.isdir(drive_folder_path):
        print(f"Error: The folder path '{drive_folder_path}' does not exist or is not a directory.")
        print(f"Please ensure the folder '{DRIVE_BASE_FOLDER_PATH}' exists in your Google Drive or check the configuration.")
        raise FileNotFoundError(f"Directory not found: {drive_folder_path}")
    else:
        print(f"Looking for MP3 files in: {drive_folder_path}")
        try:
            all_items = os.listdir(drive_folder_path)
            mp3_files = [f for f in all_items if f.lower().endswith('.mp3') and os.path.isfile(os.path.join(drive_folder_path, f))]
        except Exception as e:
            print(f"Error listing files in '{drive_folder_path}': {e}")
            raise

        if not mp3_files:
            print(f"No MP3 files found in the folder: {drive_folder_path}")
            print("Please upload some MP3 files to this location in your Google Drive.")
            raise FileNotFoundError(f"No MP3 files found in {drive_folder_path}.")
        else:
            print("\nFound the following MP3 files:")
            for i, filename in enumerate(mp3_files):
                print(f"{i + 1}: {filename}")

            while True:
                try:
                    selection_input = input(f"\nEnter the number of the MP3 file you want to process (1-{len(mp3_files)}), or 'c' to cancel: ")
                    if selection_input.lower() == 'c':
                        print("Operation cancelled by user.")
                        raise SystemExit
                    selection = int(selection_input)
                    if 1 <= selection <= len(mp3_files):
                        selected_mp3_filename = mp3_files[selection - 1]
                        selected_mp3_path = os.path.join(drive_folder_path, selected_mp3_filename)
                        basename_for_outputs = os.path.splitext(selected_mp3_filename)[0]
                        print(f"\nYou selected: {selected_mp3_filename}")
                        break
                    else:
                        print("Invalid selection. Please enter a number from the list.")
                except ValueError:
                    print("Invalid input. Please enter a number or 'c' to cancel.")
                except KeyboardInterrupt: # Gracefully handle Ctrl+C
                    print("\nSelection cancelled by user.")
                    raise SystemExit

    # Step 3: Transcribe Audio with AssemblyAI
    if selected_mp3_filename: # Check if a file was actually selected
        print("\n--- Step 3: Transcribing Audio ---")
        if selected_mp3_path: # Ensure MP3 path was set
            print(f"Starting transcription for: {selected_mp3_filename}")
            print(f"Using audio file path: {selected_mp3_path}")

            config = aai.TranscriptionConfig(
                speech_model=aai.SpeechModel.universal, 
                speaker_labels=True,
                language_detection=True 
            )
            try:
                print("Submitting file to AssemblyAI for transcription...")
                start_time = time.time()
                transcript = transcriber.transcribe(selected_mp3_path, config=config)
                end_time = time.time()

                if transcript.status == aai.TranscriptStatus.error:
                    print(f"Transcription failed: {transcript.error}")
                    raise RuntimeError(f"AssemblyAI Transcription Error: {transcript.error}")
                elif transcript.status == aai.TranscriptStatus.completed:
                    print(f"Transcription completed successfully in {end_time - start_time:.2f} seconds.")
                    transcript_text = transcript.text
                    if not transcript_text:
                        print("Warning: Transcription completed but returned empty text. The audio might be silent or have no discernible speech.")
                    if transcript.utterances:
                        print("\n--- Utterances (Speaker Labeled Segments) ---")
                        for utterance in transcript.utterances:
                            print(f"Speaker {utterance.speaker}: {utterance.text}")
                    else: 
                        print("\n--- Full Transcript (No speaker labels detected/returned or not applicable) ---")
                        print(transcript_text if transcript_text else "[No text generated from transcription]")
                else:
                    print(f"Transcription status: {transcript.status}. This is an unexpected status.")
                    raise RuntimeError(f"Unexpected AssemblyAI Transcription Status: {transcript.status}")
            except Exception as e:
                print(f"An error occurred during transcription: {e}")
                raise
        else:
            print("Skipping transcription because no MP3 file was selected (this should not happen if selection logic is correct).")
    elif user_intent == "1": # If intent was 1 but no mp3 filename (e.g. user cancelled selection)
         print("Skipping transcription step as MP3 file was not selected or selection was cancelled.")

    # Step 4: Save Transcript to Google Drive
    if user_intent == "1": # Ensure this block only runs if intent was 1
        print("\n--- Step 4: Saving Transcript ---")
        if transcript_text and basename_for_outputs:
            current_date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 
            transcript_filename = f"{basename_for_outputs}_{current_date}_transcript.txt"
            transcript_save_path = os.path.join(drive_folder_path, transcript_filename)
            try:
                with open(transcript_save_path, 'w', encoding='utf-8') as f:
                    f.write("Transcript generated from: " + (selected_mp3_filename if selected_mp3_filename else "N/A") + "\n")
                    f.write("Transcription Date: " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n\n")
                    if transcript and transcript.utterances: 
                        f.write("--- Utterances ---\n")
                        for utterance in transcript.utterances:
                            f.write(f"Speaker {utterance.speaker}: {utterance.text}\n")
                        f.write("\n--- Full Raw Transcript ---\n") 
                    f.write(transcript_text if transcript_text else "[No text generated]")
                print(f"Transcript saved successfully to: {transcript_save_path}")
            except Exception as e:
                print(f"Error saving transcript file: {e}")
                print("Warning: Could not save transcript file.")
        elif not transcript_text and selected_mp3_filename: # If mp3 was selected but no transcript text
            print("Skipping transcript saving because transcription failed or produced no text.")
        elif not basename_for_outputs and selected_mp3_filename: 
            print("Skipping transcript saving because base filename is missing (internal script error).")
        elif not selected_mp3_filename: # If intent was 1 but no file selected/cancelled
             print("Skipping transcript saving as no MP3 file was processed.")

elif user_intent == "2":
    # --- Path 2: Summarize Existing Transcript ---
    print("\n--- Operation: Summarize Existing Transcript ---")

    # Step 2: Select Transcript File (.txt) from Configured Drive Folder
    print("\n--- Step 2: Selecting Transcript File ---")
    if not os.path.isdir(drive_folder_path):
        print(f"Error: The folder path '{drive_folder_path}' does not exist or is not a directory.")
        print(f"Please ensure the folder '{DRIVE_BASE_FOLDER_PATH}' exists in your Google Drive or check the configuration.")
        raise FileNotFoundError(f"Directory not found: {drive_folder_path}")
    else:
        print(f"Looking for .txt transcript files in: {drive_folder_path}")
        try:
            all_items = os.listdir(drive_folder_path)
            txt_files = [f for f in all_items if f.lower().endswith('.txt') and
                         not f.lower().endswith('_summary.txt') and 
                         os.path.isfile(os.path.join(drive_folder_path, f))]
        except Exception as e:
            print(f"Error listing files in '{drive_folder_path}': {e}")
            raise

        if not txt_files:
            print(f"No suitable .txt files (excluding existing summaries) found in the folder: {drive_folder_path}")
            print("Please ensure you have transcript .txt files in this location.")
            raise FileNotFoundError(f"No .txt files found in {drive_folder_path}.")
        else:
            print("\nFound the following .txt files (potential transcripts):")
            for i, filename in enumerate(txt_files):
                print(f"{i + 1}: {filename}")

            while True:
                try:
                    selection_input = input(f"\nEnter the number of the .txt file you want to process (1-{len(txt_files)}), or 'c' to cancel: ")
                    if selection_input.lower() == 'c':
                        print("Operation cancelled by user.")
                        raise SystemExit
                    selection = int(selection_input)
                    if 1 <= selection <= len(txt_files):
                        selected_transcript_filename = txt_files[selection - 1]
                        selected_transcript_path = os.path.join(drive_folder_path, selected_transcript_filename)
                        if selected_transcript_filename.lower().endswith('_transcript.txt'):
                            basename_for_outputs = selected_transcript_filename[:-len('_transcript.txt')]
                        else:
                            basename_for_outputs = os.path.splitext(selected_transcript_filename)[0]

                        print(f"\nYou selected: {selected_transcript_filename}")
                        try:
                            with open(selected_transcript_path, 'r', encoding='utf-8') as f:
                                transcript_text = f.read().strip() 
                            if not transcript_text:
                                print(f"Error: The selected transcript file '{selected_transcript_filename}' is empty.")
                                print("Please select a file with content or ensure the file is not empty.")
                                raise ValueError(f"Empty transcript file: {selected_transcript_filename}. Cannot summarize.")
                            print(f"Transcript loaded successfully from: {selected_transcript_path}")
                            break 
                        except Exception as e_read:
                            print(f"Error reading transcript file '{selected_transcript_filename}': {e_read}")
                            raise 
                    else:
                        print("Invalid selection. Please enter a number from the list.")
                except ValueError as ve:
                    if "Empty transcript file" in str(ve):
                        raise 
                    print("Invalid input. Please enter a number or 'c' to cancel.")
                except KeyboardInterrupt:
                    print("\nSelection cancelled by user.")
                    raise SystemExit

# --- Common Steps for Summarization (if transcript_text is available) ---
current_step_number_summarize = 5 if user_intent == "1" else 3
print(f"\n--- Step {current_step_number_summarize}: Summarizing Transcript ---")

if transcript_text: 
    print("Starting summarization using OpenRouter (model: google/gemini-2.5-flash-preview)...")
    prompt_messages = [
        {
            "role": "system",
            "content": "You are an expert meeting summarizer. Analyze the following meeting transcript and provide a concise yet comprehensive summary. Focus on key discussion points, decisions made, and any action items assigned. If speaker labels (e.g., 'Speaker A:', 'Speaker B:') are present, try to attribute points to speakers in the summary where relevant and natural."
        },
        {
            "role": "user",
            "content": f"Please summarize the following meeting transcript:\n\n---\n\n{transcript_text}\n\n---\n\nSummary:"
        }
    ]
    try:
        print("Sending request to OpenRouter for summarization...")
        start_time = time.time()
        completion = openrouter_client.chat.completions.create(
          model="google/gemini-2.5-flash-preview", 
          messages=prompt_messages,
          temperature=0.5, 
        )
        end_time = time.time()

        if completion.choices and completion.choices[0].message and completion.choices[0].message.content:
            summary_text = completion.choices[0].message.content.strip()
            if not summary_text: 
                print("Warning: Summarization completed but returned empty or whitespace-only text.")
                summary_text = None 
            else:
                print(f"Summarization completed successfully in {end_time - start_time:.2f} seconds.")
                print("\n--- Generated Summary ---")
                print(summary_text)
        else:
            print("Warning: Summarization response from OpenRouter did not contain the expected content structure.")
            summary_text = None
    except Exception as e:
        print(f"An error occurred during summarization with OpenRouter: {e}")
        summary_text = None 
        print("Warning: Summarization failed. The transcript will not be summarized.")
else:
    # This handles cases where transcript_text is None or empty due to earlier steps
    if user_intent == "1":
        print("Skipping summarization because transcription was not performed, failed, produced no text, or was cancelled.")
    elif user_intent == "2":
        print("Skipping summarization because no valid transcript file was loaded, the file was empty, or selection was cancelled.")
    else: # Should not be reached if user_intent is always 1 or 2
        print("Skipping summarization as no transcript text is available.")

# --- Common Step for Saving Summary ---
current_step_number_save_summary = 6 if user_intent == "1" else 4
print(f"\n--- Step {current_step_number_save_summary}: Saving Summary ---")

if summary_text and basename_for_outputs: 
    current_date_summary = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 
    summary_filename = f"{basename_for_outputs}_{current_date_summary}_summary.txt"
    summary_save_path = os.path.join(drive_folder_path, summary_filename)
    try:
        with open(summary_save_path, 'w', encoding='utf-8') as f:
            f.write("Summary for: " + (selected_mp3_filename if user_intent == '1' and selected_mp3_filename else selected_transcript_filename if user_intent == '2' and selected_transcript_filename else "N/A") + "\n")
            f.write("Summary Generation Date: " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n\n")
            f.write(summary_text)
        print(f"Summary saved successfully to: {summary_save_path}")
    except Exception as e:
        print(f"Error saving summary file: {e}")
        print("Warning: Could not save summary file.")
elif not summary_text and basename_for_outputs: # If there was a basename but no summary
    print("Skipping summary saving because summarization failed, produced no text, or was not attempted.")
elif not basename_for_outputs: 
    print("Skipping summary saving because the base filename for output is missing (this indicates an earlier issue, e.g., no file selected).")
else: # Catch-all for other cases where summary_text might be None
    print("Skipping summary saving as there is no summary text to save.")

print("\n--- Script Finished ---")