In [3]:
import os
from google.cloud import speech_v1p1beta1 as speech

def transcribe_audio_with_diarization(audio_file_path, credentials_path, output_folder):
    # Set Google Cloud credentials
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path

    # Initialize the client
    client = speech.SpeechClient()

    # Read the audio file
    with open(audio_file_path, "rb") as audio_file:
        content = audio_file.read()

    # Configure recognition request
    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.MP3,  # Change to MP3 if your file is MP3
        sample_rate_hertz=16000,  # Replace with your audio's sample rate
        language_code="en-US",
        enable_speaker_diarization=True,
        diarization_speaker_count=2,  # Specify the number of speakers
        model="phone_call"  # Use "phone_call" if applicable
    )

    # Perform the recognition
    response = client.recognize(config=config, audio=audio)

    # Group words by speakers
    speaker_transcripts = {}
    for result in response.results:
        for word_info in result.alternatives[0].words:
            speaker = word_info.speaker_tag
            word = word_info.word
            if speaker not in speaker_transcripts:
                speaker_transcripts[speaker] = []
            speaker_transcripts[speaker].append(word)

    # Prepare output text
    output_text = []
    for speaker, words in speaker_transcripts.items():
        output_text.append(f"Speaker {speaker}: {' '.join(words)}")

    # Join the text with line breaks
    output_text_str = "\n\n".join(output_text)

    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write to a text file
    output_file_path = os.path.join(output_folder, "transcription1.txt")
    with open(output_file_path, "w", encoding="utf-8") as output_file:
        output_file.write(output_text_str)

    print(f"Transcription saved to {output_file_path}")


# Replace with your audio file path, credentials path, and desired output folder
audio_file_path = "dataset/1215.MP3"  # Change to your file path
credentials_path = "C:/gcloud/service-account-key.json"  # Change to your credentials file path



output_folder = "output"  # Specify the output folder

# Call the function
transcribe_audio_with_diarization(audio_file_path, credentials_path, output_folder)


Transcription saved to output\transcription1.txt
