********** Annual Sales Strategy Meet **********


In [3]:
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from tkinter.scrolledtext import ScrolledText
import speech_recognition as sr
from pydub import AudioSegment
from transformers import pipeline

class AudioAnalyzerGUI:
    def __init__(self, root):
        self.root = root
        self.root.title("Audio Analysis Tool")
        self.root.geometry("1000x800")
        
        self.create_widgets()
        self.setup_sample_data()

    def create_widgets(self):
        # File Input Section
        input_frame = ttk.Frame(self.root)
        input_frame.pack(pady=10, fill=tk.X)
        
        ttk.Label(input_frame, text="Audio File Path:").pack(side=tk.LEFT, padx=5)
        self.file_entry = ttk.Entry(input_frame, width=50)
        self.file_entry.pack(side=tk.LEFT, padx=5)
        
        ttk.Button(input_frame, text="Browse", command=self.browse_file).pack(side=tk.LEFT, padx=5)
        ttk.Button(input_frame, text="Process", command=self.process_audio).pack(side=tk.LEFT)

        # Results Display
        results_frame = ttk.Frame(self.root)
        results_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        # Transcription Section
        ttk.Label(results_frame, text="Full Transcription:").pack(anchor=tk.W)
        self.transcription_text = ScrolledText(results_frame, height=10, wrap=tk.WORD)
        self.transcription_text.pack(fill=tk.BOTH, expand=True)

        # Summary Section
        ttk.Label(results_frame, text="Summary:").pack(anchor=tk.W, pady=(10,0))
        self.summary_text = ScrolledText(results_frame, height=5, wrap=tk.WORD)
        self.summary_text.pack(fill=tk.BOTH, expand=True)

        # Metrics Section
        ttk.Label(results_frame, text="Participation Metrics:").pack(anchor=tk.W, pady=(10,0))
        self.metrics_text = tk.Text(results_frame, height=10, wrap=tk.NONE)
        self.metrics_text.pack(fill=tk.BOTH, expand=True)

    def setup_sample_data(self):
        # Initial placeholder data
        self.sample_transcription = "Transcription will appear here..."
        self.sample_summary = "Summary will be generated automatically..."
        self.metrics_data = [
            ("Speaker 1", "0.00 min", 0, "Processing...")
        ]

    def browse_file(self):
        filepath = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav *.mp3 *.ogg")])
        self.file_entry.delete(0, tk.END)
        self.file_entry.insert(0, filepath)

    def process_audio(self):
        filepath = self.file_entry.get()
        
        if not filepath:
            messagebox.showerror("Error", "Please select an audio file")
            return
            
        try:
            # Load and convert audio
            audio = AudioSegment.from_file(filepath)
            duration_sec = len(audio) / 1000  # Get duration in seconds
            
            # Convert to WAV for processing
            wav_path = "audio2.wav"
            audio.export(wav_path, format="wav")
            
            # Transcribe audio
            recognizer = sr.Recognizer()
            with sr.AudioFile(wav_path) as source:
                audio_data = recognizer.record(source)
                transcription = recognizer.recognize_google(audio_data)
            
            # Generate summary using Hugging Face transformers
            summary = self.summarize_text(transcription)
            
            # Calculate metrics for multiple speakers
            # This is a simple heuristic: split the transcription into parts and assign to speakers
            # In a real-world scenario, you would use speaker diarization here
            parts = transcription.split('.')
            num_speakers = 2  # Assume 2 speakers for this example
            speaker_parts = [parts[i::num_speakers] for i in range(num_speakers)]
            
            self.sample_transcription = transcription
            self.sample_summary = summary
            self.metrics_data = []
            
            for i, speaker_part in enumerate(speaker_parts):
                speaker_duration = (len(speaker_part) / len(parts)) * duration_sec / 60
                contributions = len(speaker_part)
                self.metrics_data.append((f"Speaker {i+1}", f"{speaker_duration:.2f} min", contributions, "Full conversation"))
            
            self.show_results()
            
        except Exception as e:
            messagebox.showerror("Processing Error", str(e))

    def summarize_text(self, text):
        """Summarize text using Hugging Face transformers."""
        summarizer = pipeline("summarization")
        summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
        return summary[0]['summary_text']

    def show_results(self):
        # Clear previous content
        self.transcription_text.delete(1.0, tk.END)
        self.summary_text.delete(1.0, tk.END)
        self.metrics_text.delete(1.0, tk.END)

        # Insert new results
        self.transcription_text.insert(tk.END, self.sample_transcription)
        self.summary_text.insert(tk.END, self.sample_summary)
        self.create_metrics_table()

    def create_metrics_table(self):
        headers = ["Participant", "Duration", "Contributions", "Key Points"]
        col_widths = [15, 15, 15, 60]
        
        # Create table header
        separator = "+".join(["-"*w for w in col_widths])
        self.metrics_text.insert(tk.END, f"+{separator}+\n")
        self.metrics_text.insert(tk.END, "|".join([h.ljust(w) for h, w in zip(headers, col_widths)]) + "\n")
        self.metrics_text.insert(tk.END, f"+{separator}+\n")

        # Add data rows
        for row in self.metrics_data:
            formatted_row = "|".join([str(cell).ljust(w) for cell, w in zip(row, col_widths)])
            self.metrics_text.insert(tk.END, f"|{formatted_row}|\n")
            self.metrics_text.insert(tk.END, f"+{separator}+\n")

        # Configure table styling
        self.metrics_text.configure(font=("Courier New", 10), state=tk.DISABLED)

if __name__ == "__main__":
    root = tk.Tk()
    app = AudioAnalyzerGUI(root)
    root.mainloop()

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
Your max_length is set to 150, but your input_length is only 45. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=22)
