In [None]:
!pip install openai-whisper
!pip install pyannote.audio

In [None]:
# Cell 2: Import pyannote.audio for Diarization
import whisper
import os
import time
from pathlib import Path
# Import pyannote.audio for diarization
from pyannote.audio.pipelines import SpeakerDiarization
from pyannote.core import Segment
from huggingface_hub import HfFolder

In [None]:
# Configuration Settings
model_size = "large" # You can adjust this based on your requirement
# Make sure to adjust the path after uploading the file or connecting Google Drive
audio_file_path = "/content/drive/MyDrive/Transcribe/social_network.mp4"

In [None]:
# Load Whisper Model
model = whisper.load_model(model_size)

In [None]:
# Transcription, Performance Metrics, Error Handling, Logging
def transcribe_and_log_full_transcript(audio_path, model, log_file="transcription_log.txt"):
  start_time = time.time() # Start measuring time
  try:
    result = model.transcribe(audio_path)
    execution_time = time.time() - start_time # Measure execution time

    # Construct success message with execution time
    success_message = f"Transcription successful for {Path(audio_path).name}! Execution time: {execution_time:.2f} seconds"
    log_message(success_message, log_file) # Log the success message

    # Save the full transcription to a text file in the same directory as the audio file
    output_file_path = Path(audio_path).with_suffix('.txt')
    with open(output_file_path, "w") as file:
      for segment in result['segments']:
        file.write(f"{segment['start']}-{segment['end']}: {segment['text']}\n")

    log_message(f"Full transcription saved to {output_file_path}", log_file)

  except Exception as e:
    # Log error message
    error_message = f"Error during transcription of {Path(audio_path).name}: {e}"
    log_message(error_message, log_file)

def log_message(message, log_file="transcription_log.txt"):
  with open(log_file, "a") as file:
    file.write(f"{message}\n")

transcribe_and_log_full_transcript(audio_file_path, model)

In [None]:
# Perform Speaker Diarization with pyannote.audio
def perform_diarization_pyannote(audio_path, output_log="diarization_pyannote_log.txt"):
  """Performs speaker diarization using pyannote.audio and logs the results."""
  # Initialize the speaker diarization pipeline with the default model
  pipeline = SpeakerDiarization(segmentation="pyannote/segmentation", use_auth_token=HfFolder.get_token())

  # Apply the pipeline on the audio file
  diarization = pipeline({'uri': 'SpeakerDiarization', 'audio': audio_path})

  # Log or process diarization results
  with open(output_log, "w") as log_file:
    for turn, _, speaker in diarization.itertracks(yield_label=True):
      start, end = turn.start, turn.end
      log_file.write(f"Speaker: {speaker}, Start: {start}, End: {end}\n")

  print("Diarization completed successfully.")

# Execute diarization
diarization_results = perform_diarization_pyannote(audio_file_path)

In [None]:
# Integrate Diarization Results with Transcription
def integrate_diarization_with_transcript(diarization_results, transcription_segments):
    """
    Integrates diarization results with transcription segments.
    diarization_results: List of tuples or dicts with speaker ID and start/end times.
    transcription_segments: List of dicts from Whisper transcription with 'text', 'start', and 'end'.
    """
    # Assuming diarization_results is a list of dicts with 'speaker', 'start', and 'end'
    integrated_output = []

    for segment in transcription_segments:
        # Find matching diarization segment based on time overlap
        speaker_label = "Unknown"
        for speaker_segment in diarization_results:
            if segment['start'] >= speaker_segment['start'] and segment['end'] <= speaker_segment['end']:
                speaker_label = speaker_segment['speaker']
                break

        integrated_output.append(f"{speaker_label}: {segment['text']}")

    return integrated_output

# Example usage:
# Load or define your diarization_results and transcription_segments based on actual outputs from pyannote and Whisper
# In this example, we're using placeholder lists for demonstration purposes.
diarization_results = [{'speaker': 'Speaker 1', 'start': 0, 'end': 10}, {'speaker': 'Speaker 2', 'start': 10, 'end': 20}]
transcription_segments = [{'text': "Hello, this is an example.", 'start': 0, 'end': 5}, {'text': "Another speaker here.", 'start': 10, 'end': 15}]

# Integrate and print the combined output
integrated_transcript = integrate_diarization_with_transcript(diarization_results, transcription_segments)
for line in integrated_transcript:
  print(line)


In [None]:
# Placeholder for future automation logic
# Here you can add code to check Google Drive for new files, trigger transcription, etc.
# This will be implemented when you're ready to automate the process.
