In [1]:
import assemblyai as aai
aai.settings.api_key = "065e13309e1e4057b820b51d246d3a32"

In [2]:
# Or use a publicly-accessible URL:
audio_file = (
    "samples\short_meet.mp3"
)

In [3]:
import google.generativeai as genai
from datetime import datetime
import json
import assemblyai as aai
import time
from typing import List
import itertools

def milliseconds_to_minutes(milliseconds):
    return milliseconds / (1000 * 60)

def batch_list(items, batch_size):
    """Custom function to create batches from a list"""
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]

class TaskExtractor:
    def __init__(self, gemini_api_key):
        genai.configure(api_key=gemini_api_key)
        self.model = genai.GenerativeModel('gemini-pro')
        self.request_count = 0
        self.last_request_time = 0
        self.RATE_LIMIT_REQUESTS = 60
        self.MIN_REQUEST_INTERVAL = 1

    def _rate_limit(self):
        """Implement rate limiting"""
        current_time = time.time()
        time_since_last = current_time - self.last_request_time
        
        if time_since_last < self.MIN_REQUEST_INTERVAL:
            time.sleep(self.MIN_REQUEST_INTERVAL - time_since_last)
        
        self.last_request_time = time.time()
        self.request_count += 1
        
        if current_time - self.last_request_time >= 60:
            self.request_count = 0

    def batch_utterances(self, utterances: List, batch_size: int = 5) -> List[List]:
        """Batch utterances to reduce API calls"""
        return [utterances[i:i + batch_size] for i in range(0, len(utterances), batch_size)]

    def process_transcript(self, utterances_info):
        """
        Process full transcript and extract tasks with batching
        Args:
            utterances_info: List of dictionaries containing speaker, text, start, and end timestamps
        Returns:
            List of tasks with speaker and timestamp information
        """
        all_tasks = []
        
        # Convert utterances to batches of text
        utterance_batches = self.batch_utterances(utterances_info)
        
        print(f"Processing {len(utterance_batches)} batches...")
        
        for batch in utterance_batches:
            # Combine batch texts with proper timestamp handling
            batch_text = "\n".join([
                f"Speaker {u['speaker']} ({u['start']:.2f} - {u['end']:.2f}): {u['text']}" 
                for u in batch
            ])
            
            # Add debug print to see what's being sent to Gemini
            print(f"\nProcessing batch text:\n{batch_text}\n")
            
            tasks = self.extract_tasks(batch_text)
            print(f"Extracted tasks from batch: {tasks}")  # Debug print
            
            # Match tasks back to their utterances based on content overlap
            for task in tasks:
                # Find the most likely utterance for this task
                matching_utterance = None
                max_overlap = 0
                
                for utterance in batch:
                    # Improved matching logic
                    task_words = set(task['task'].lower().split())
                    utterance_words = set(utterance['text'].lower().split())
                    overlap = len(task_words.intersection(utterance_words))
                    
                    if overlap > max_overlap:
                        max_overlap = overlap
                        matching_utterance = utterance
                
                if matching_utterance:
                    task.update({
                        "speaker": matching_utterance['speaker'],
                        "timestamp_start": matching_utterance['start'],
                        "timestamp_end": matching_utterance['end']
                    })
                    all_tasks.append(task)
            
            time.sleep(4)
        
        return all_tasks

    def extract_tasks(self, text_batch):
        """Extract tasks using Gemini API with rate limiting"""
        self._rate_limit()
        
        prompt = """
        Extract action items and tasks from the following conversation text. 
        Look for any statements that imply something needs to be done, assignments given, or commitments made.
        Include both explicit tasks ("I'll do X") and implicit tasks ("We need to X", "X should be done").
        
        Provide results in JSON with these keys:
        - task: The task description (required)
        - deadline: The deadline mentioned, or null if not available
        - priority: Either 'high', 'medium', or 'low' based on urgency words and context
        
        Text: {text}
        
        Return an empty array [] if no tasks are found.
        Ensure the response is valid JSON.
        """.format(text=text_batch)
        
        try:
            response = self.model.generate_content(prompt)
            
            # Clean the response to extract just the JSON part
            json_str = response.text
            json_str = json_str.replace('```json', '').replace('```', '').strip()
            
            # Debug print
            print(f"Gemini response:\n{json_str}\n")
            
            return json.loads(json_str)
        except Exception as e:
            print(f"Error processing batch: {e}")
            return []

def format_todo_list(tasks):
    """Format tasks into a readable todo list with optional timestamp handling"""
    if not tasks:
        return "No tasks found in the transcript."
        
    formatted_list = []
    
    # Sort tasks by priority (high -> medium -> low)
    priority_order = {"high": 0, "medium": 1, "low": 2}
    sorted_tasks = sorted(tasks, key=lambda x: priority_order.get(x.get('priority', 'low'), 3))
    
    for task in sorted_tasks:
        # Build the task string components
        deadline_str = f" (Deadline: {task['deadline']})" if task.get('deadline') else ""
        priority_str = f"[{task['priority'].upper()}]" if task.get('priority') else ""
        
        # Only add timestamp and speaker if they exist
        speaker_str = f"Speaker {task['speaker']}: " if task.get('speaker') else ""
        
        if task.get('timestamp_start') is not None and task.get('timestamp_end') is not None:
            timestamp_str = f"[{task['timestamp_start']:.2f} - {task['timestamp_end']:.2f}]"
        else:
            timestamp_str = ""
        
        todo_item = f"- {speaker_str}{task['task']} {priority_str}{deadline_str} {timestamp_str}".strip()
        formatted_list.append(todo_item)
    
    return "\n".join(formatted_list)

def process_audio_to_tasks(audio_file_path, aai_api_key, gemini_api_key):
    """Process audio file to extract tasks with improved error handling"""
    try:
        # Set up AssemblyAI
        aai.settings.api_key = aai_api_key
        
        # Configure transcription
        config = aai.TranscriptionConfig(speaker_labels=True)
        
        print("\n=== Starting Audio Transcription ===")
        transcript = aai.Transcriber().transcribe(audio_file_path, config)

        print("\n=== Full Transcript Text ===")
        print(transcript.text)
        
        print("\n=== Processing Utterances ===")
        utterances_info = []
        
        for utterance in transcript.utterances:
            # Convert timestamps to minutes
            start_minutes = utterance.start / (1000 * 60)
            end_minutes = utterance.end / (1000 * 60)
            
            utterance_data = {
                "speaker": utterance.speaker,
                "text": utterance.text,
                "start": start_minutes,
                "end": end_minutes
            }
            utterances_info.append(utterance_data)
            
            print(f"\nUtterance Details:")
            print(f"Speaker: {utterance.speaker}")
            print(f"Text: {utterance.text}")
            print(f"Time: {start_minutes:.2f} - {end_minutes:.2f}")

        if not utterances_info:
            print("No utterances found in transcript")
            return []

        # Create formatted transcript
        formatted_transcript = "\n".join([
            f"{u['start']:.2f}\n"
            f"Speaker {u['speaker']}: {u['text']}\n"
            f"{u['end']:.2f}\n"
            for u in utterances_info
        ])
        
        print("\n=== Extracting Tasks ===")
        extractor = TaskExtractor(gemini_api_key)
        
        # Try batch processing first
        print("\nTrying batched processing...")
        tasks = extractor.process_transcript(utterances_info)
        
        # If batch processing finds no tasks, try direct processing
        if not tasks:
            print("\nBatch processing found no tasks. Trying direct processing...")
            direct_tasks = extractor.extract_tasks(formatted_transcript)
            
            # If direct processing found tasks, add timing information
            if direct_tasks:
                print("\nDirect processing found tasks. Adding timing information...")
                tasks = []
                for task in direct_tasks:
                    # Find the most relevant utterance for this task
                    best_match = None
                    max_overlap = 0
                    
                    for utterance in utterances_info:
                        task_words = set(task['task'].lower().split())
                        utterance_words = set(utterance['text'].lower().split())
                        overlap = len(task_words.intersection(utterance_words))
                        
                        if overlap > max_overlap:
                            max_overlap = overlap
                            best_match = utterance
                    
                    if best_match:
                        task.update({
                            "speaker": best_match['speaker'],
                            "timestamp_start": best_match['start'],
                            "timestamp_end": best_match['end']
                        })
                        tasks.append(task)
                    else:
                        # If no matching utterance found, add task without timing info
                        tasks.append(task)

        return tasks
            
    except Exception as e:
        print(f"\n=== Error in Processing ===")
        print(f"Error type: {type(e).__name__}")
        print(f"Error message: {str(e)}")
        print("\nTraceback:")
        import traceback
        traceback.print_exc()
        return []

def save_tasks_to_json(tasks, output_file='todo_list.json'):
    """
    Save tasks to a JSON file with standardized format
    Args:
        tasks: List of task dictionaries
        output_file: Path to output JSON file
    """
    if not tasks:
        return False
        
    # Standardize the task format for JSON output
    formatted_tasks = []
    for task in tasks:
        formatted_task = {
            "task": task.get('task', ''),
            "deadline": task.get('deadline', None),
            "priority": task.get('priority', 'low'),
            "timestamp": {
                "start": task.get('timestamp_start'),
                "end": task.get('timestamp_end')
            }
        }
        formatted_tasks.append(formatted_task)
    
    # Save to JSON file
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(formatted_tasks, f, indent=2, ensure_ascii=False)
        return True
    except Exception as e:
        print(f"Error saving JSON file: {e}")
        return False

# Modify the main execution block
if __name__ == "__main__":
    # Your API keys
    ASSEMBLYAI_API_KEY = "065e13309e1e4057b820b51d246d3a32"
    GEMINI_API_KEY = "AIzaSyDbI8IHt-iMqVSmnGN2AxLKUp4InndfSy4"
    
    # Path to your audio file
    AUDIO_FILE_PATH = "samples\short_meet.mp3"
    
    # Process audio and get todo list
    tasks = process_audio_to_tasks(
        AUDIO_FILE_PATH,
        ASSEMBLYAI_API_KEY,
        GEMINI_API_KEY
    )
    
    if tasks:
        # Save tasks to JSON
        if save_tasks_to_json(tasks):
            print("Tasks successfully saved to todo_list.json")
            
            # Also print formatted text version
            print("\nTODO List (Text Format):")
            todo_list = format_todo_list(tasks)
            print(todo_list)
        else:
            print("Failed to save tasks to JSON file")
    else:
        print("No tasks found or failed to process audio file.")

  from .autonotebook import tqdm as notebook_tqdm



=== Starting Audio Transcription ===

=== Full Transcript Text ===
Hello, everyone. Thank you guys for coming to our weekly student success meeting. And let's just get started. So I have our list of chronically absent students here. And I've been noticing a troubling trend. A lot of students are skipping on Fridays. Does anyone have any idea what's going on? I've heard some of my mentees talking about how it's really hard to get out of bed on Fridays. It might be good if we did something like a pancake breakfast to encourage them to come. I think that's a great idea. Let's try that next week. It might also be because a lot of students have been getting sick now that it's getting colder outside. I've had a number of students come by my office with symptoms like sniffling and coughing. We should put up posters with tips for not getting sick since it's almost flu season. Like, you know, wash your hands after the bathroom, stuff like that. I think that's a good idea and it'll be a good re

In [4]:
sample_text = """
0.01
Speaker A: Hello, everyone. Thank you guys for coming to our weekly student success meeting. And let's just get started. So I have our list of chronically absent students here. And I've been noticing a troubling trend. A lot of students are skipping on Fridays. Does anyone have any idea what's going on?
0.28

0.29
Speaker C: I've heard some of my mentees talking about how it's really hard to get out of bed on Fridays. It might be good if we did something like a pancake breakfast to encourage them to come.
0.43

0.44
Speaker A: I think that's a great idea. Let's try that next week.
0.49

0.50
Speaker D: It might also be because a lot of students have been getting sick now that it's getting colder outside. I've had a number of students come by my office with symptoms like sniffling and coughing. We should put up posters with tips for not getting sick since it's almost flu season. Like, you know, wash your hands after the bathroom, stuff like that.
0.74

0.75
Speaker A: I think that's a good idea and it'll be a good reminder for the teachers as well. So one other thing I wanted to talk about. There's a student I've noticed here, John Smith. He's missed seven days already and it's only November. Does anyone have an idea what's going on with him?
1.00

1.00
Speaker C: I might be able to fill in the gaps there. I talked to John today and he's really stressed out. He's been dealing with helping his parents take care of his younger siblings during the day. It might actually be a good idea if he spoke to the guidance counselor a little bit.
1.22

1.23
Speaker B: I can talk to John today if you want to send him to my office after you meet with him. It's a lot to deal with for a middle schooler. Great, thanks. And I can help out with the family's childcare needs. I'll look for some free or low cost resources in the community to share with John and he can share them with his family.
1.52

1.52
Speaker A: Great. Well, some really good ideas here today. Thanks for coming. And if no one has anything else, I think we can wrap up.
1.62

"""

extractor = TaskExtractor("AIzaSyDbI8IHt-iMqVSmnGN2AxLKUp4InndfSy4")
tasks = extractor.extract_tasks(sample_text)
print("Tasks from sample text:", tasks)

Gemini response:
[
  {
    "task": "Implement a pancake breakfast to encourage students to attend school on Fridays",
    "deadline": "Next week",
    "priority": "medium"
  },
  {
    "task": "Put up posters with tips for not getting sick",
    "deadline": "Not specified",
    "priority": "low"
  },
  {
    "task": "Have John Smith speak to the guidance counselor",
    "deadline": "Not specified",
    "priority": "medium"
  },
  {
    "task": "Investigate free or low-cost childcare resources for John Smith's family",
    "deadline": "Not specified",
    "priority": "medium"
  }
]

Tasks from sample text: [{'task': 'Implement a pancake breakfast to encourage students to attend school on Fridays', 'deadline': 'Next week', 'priority': 'medium'}, {'task': 'Put up posters with tips for not getting sick', 'deadline': 'Not specified', 'priority': 'low'}, {'task': 'Have John Smith speak to the guidance counselor', 'deadline': 'Not specified', 'priority': 'medium'}, {'task': "Investigate free 

In [5]:
import json
import assemblyai as aai

def milliseconds_to_minutes(milliseconds):
    return milliseconds / (1000 * 60)

config = aai.TranscriptionConfig(
    speaker_labels=True,
)

audio_file = "samples\short_meet.mp3"  # Replace with your audio file path
transcript = aai.Transcriber().transcribe(audio_file, config)

utterances_info = []

for utterance in transcript.utterances:
    start_minutes = milliseconds_to_minutes(utterance.start)
    end_minutes = milliseconds_to_minutes(utterance.end)
    print(f"{start_minutes:.2f}")
    print(f"Speaker {utterance.speaker}: {utterance.text}")
    print(f"{end_minutes:.2f}")
    print()
    
    utterance_data = {
        "start_minutes": start_minutes,
        "end_minutes": end_minutes,
        "speaker": utterance.speaker,
        "text": utterance.text
    }
    utterances_info.append(utterance_data)

# Save to JSON file
output_file = "utterances_info.json"
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(utterances_info, f, indent=2, ensure_ascii=False)

print(f"Utterances information saved to {output_file}")

0.01
Speaker A: Hello, everyone. Thank you guys for coming to our weekly student success meeting. And let's just get started. So I have our list of chronically absent students here. And I've been noticing a troubling trend. A lot of students are skipping on Fridays. Does anyone have any idea what's going on?
0.28

0.29
Speaker C: I've heard some of my mentees talking about how it's really hard to get out of bed on Fridays. It might be good if we did something like a pancake breakfast to encourage them to come.
0.43

0.44
Speaker A: I think that's a great idea. Let's try that next week.
0.49

0.50
Speaker D: It might also be because a lot of students have been getting sick now that it's getting colder outside. I've had a number of students come by my office with symptoms like sniffling and coughing. We should put up posters with tips for not getting sick since it's almost flu season. Like, you know, wash your hands after the bathroom, stuff like that.
0.74

0.75
Speaker A: I think that'