In [6]:
from google.cloud import speech_v1p1beta1 as speech
import os

def transcribe_audio_with_diarization(audio_file_path, credentials_path):
    # Set Google Cloud credentials
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path

    # Initialize the client
    client = speech.SpeechClient()

    # Read the audio file
    with open(audio_file_path, "rb") as audio_file:
        content = audio_file.read()

    # Configure recognition request
    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.MP3,  # Specify MP3 encoding
        sample_rate_hertz=44100,  # Replace with your audio's sample rate
        language_code="en-US",
        enable_speaker_diarization=True,
        diarization_speaker_count=2,
        model="video"  # Use "phone_call" if applicable
    )


    # Perform the recognition
    response = client.recognize(config=config, audio=audio)

    # Process and print the results
    for result in response.results:
        # The first alternative is the most likely transcription
        alternative = result.alternatives[0]
        print("Transcript:", alternative.transcript)

        # Access diarization information
        words_info = alternative.words
        for word_info in words_info:
            print(
                f"Word: {word_info.word}, Speaker: {word_info.speaker_tag}"
            )

    # Group the transcript by speakers
    speaker_transcripts = {}
    for result in response.results:
        for word_info in result.alternatives[0].words:
            speaker = word_info.speaker_tag
            word = word_info.word
            if speaker not in speaker_transcripts:
                speaker_transcripts[speaker] = []
            speaker_transcripts[speaker].append(word)

    # Print speaker-wise transcripts
    for speaker, words in speaker_transcripts.items():
        print(f"Speaker {speaker}: {' '.join(words)}")

# Replace with the path to your audio file and credentials file
audio_file_path = "dataset/1215.MP3"
credentials_path = "C:/gcloud/service-account-key.json"

# Call the function
transcribe_audio_with_diarization(audio_file_path, credentials_path)


Transcript: so uh just chat with could you please um Talk a bit more about CBM Nigeria what it is doing when it has started from project in Nigeria or activities and what are you of activities
Word: so, Speaker: 0
Word: uh, Speaker: 0
Word: just, Speaker: 0
Word: chat, Speaker: 0
Word: with, Speaker: 0
Word: could, Speaker: 0
Word: you, Speaker: 0
Word: please, Speaker: 0
Word: um, Speaker: 0
Word: Talk, Speaker: 0
Word: a, Speaker: 0
Word: bit, Speaker: 0
Word: more, Speaker: 0
Word: about, Speaker: 0
Word: CBM, Speaker: 0
Word: Nigeria, Speaker: 0
Word: what, Speaker: 0
Word: it, Speaker: 0
Word: is, Speaker: 0
Word: doing, Speaker: 0
Word: when, Speaker: 0
Word: it, Speaker: 0
Word: has, Speaker: 0
Word: started, Speaker: 0
Word: from, Speaker: 0
Word: project, Speaker: 0
Word: in, Speaker: 0
Word: Nigeria, Speaker: 0
Word: or, Speaker: 0
Word: activities, Speaker: 0
Word: and, Speaker: 0
Word: what, Speaker: 0
Word: are, Speaker: 0
Word: you, Speaker: 0
Word: of, Speaker: 0
Word: a