In [8]:
from google.cloud import speech

def transcribe_audio_with_word_timestamps(gcs_uri):
    client = speech.SpeechClient()

    audio = speech.RecognitionAudio(uri=gcs_uri)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.MP3,  # Use FLAC or LINEAR16 for best results
        sample_rate_hertz=44100,
        language_code="en-US",
        enable_word_time_offsets=True,  # Enables word timestamps
        enable_automatic_punctuation=True  # Enables automatic punctuation
    )

    operation = client.long_running_recognize(config=config, audio=audio)
    response = operation.result(timeout=600)  # Wait for operation to complete

    transcript_text = ""  # Full transcript text
    word_timestamps = []  # Word-level timestamps

    for result in response.results:
        alternative = result.alternatives[0]  # Best alternative
        transcript_text += alternative.transcript + " "  # Build full transcript

        for word_info in alternative.words:
            word_timestamps.append({
                "word": word_info.word,
                "start_time": word_info.start_time.total_seconds(),
                "end_time": word_info.end_time.total_seconds(),
            })

    return transcript_text.strip(), word_timestamps

# Example usage
gcs_uri = "gs://simpliearn-audio/tesla_q4_split.mp3"
transcript_text, word_timestamps = transcribe_audio_with_word_timestamps(gcs_uri)

print("Transcript with Punctuation:")
print(transcript_text)

print("\nWord Timestamps:")
for word in word_timestamps:
    print(f"{word['word']} -> Start: {word['start_time']}s, End: {word['end_time']}s")


Transcript with Punctuation:
Good afternoon everyone and welcome to Teslas. Fourth quarter, 2024 Cuban a webcast. My name is Travis, Axelrod investor relations and a number of other executives. Are Q4 results were announced at about 3 p.m. central Time in the update deck be published at the same length as a business Outlook. And make forward-looking statements are based on our predictions and expectations as of today, actual events or results could differ materially due to a number of risks and uncertainties, including those mentioned our most recent filings with the SEC.  During the question-and-answer portion of today's call, please limit yourself to one question and one follow-up. Please use the phrase hand button to join the question to you. Before we jump in. The Q&A Elan has some marks on an annualised rate of a year except work retrieving record production and deliveries electrical the best vehicle of any kind on Earth. Another one with Spotify on volume four,  It was really fun

In [9]:
import nltk
from nltk.tokenize import sent_tokenize

nltk.download("punkt_tab")

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/minlee/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [10]:
def split_intto_sentences(text):
    return sent_tokenize(text)

sentences = split_intto_sentences(transcript_text)

for idx, sentence in enumerate(sentences):
    print(f"Sentence {idx + 1}: {sentence}")

Sentence 1: Good afternoon everyone and welcome to Teslas.
Sentence 2: Fourth quarter, 2024 Cuban a webcast.
Sentence 3: My name is Travis, Axelrod investor relations and a number of other executives.
Sentence 4: Are Q4 results were announced at about 3 p.m. central Time in the update deck be published at the same length as a business Outlook.
Sentence 5: And make forward-looking statements are based on our predictions and expectations as of today, actual events or results could differ materially due to a number of risks and uncertainties, including those mentioned our most recent filings with the SEC.
Sentence 6: During the question-and-answer portion of today's call, please limit yourself to one question and one follow-up.
Sentence 7: Please use the phrase hand button to join the question to you.
Sentence 8: Before we jump in.
Sentence 9: The Q&A Elan has some marks on an annualised rate of a year except work retrieving record production and deliveries electrical the best vehicle of 

In [24]:
def assign_timestamps_to_sentences(sentences, word_timestamps):
    sentence_timestamps = []
    word_index = 0  # Tracks position in the word list

    for sentence in sentences:
        words = sentence.split()  # Split sentence into words
        if not words:
            continue

        # Find the start and end timestamps
        start_time = word_timestamps[word_index]["start_time"]
        end_time = word_timestamps[word_index + len(words) - 1]["end_time"]

        # Store the matched sentence with timestamps
        sentence_timestamps.append({
            "sentence": sentence,
            "start_time": start_time,
            "end_time": end_time
        })

        # Move word_index forward
        word_index += len(words)

    return sentence_timestamps

# Example usage
matched_sentences = assign_timestamps_to_sentences(sentences, word_timestamps)

# Print results
for item in matched_sentences:
    print(f"Sentence: {item['sentence']}")
    print(f"Start Time: {item['start_time']}s, End Time: {item['end_time']}s\n")


Sentence: Good afternoon everyone and welcome to Teslas.
Start Time: 18.4s, End Time: 20.4s

Sentence: Fourth quarter, 2024 Cuban a webcast.
Start Time: 20.4s, End Time: 22.8s

Sentence: My name is Travis, Axelrod investor relations and a number of other executives.
Start Time: 22.8s, End Time: 31.9s

Sentence: Are Q4 results were announced at about 3 p.m. central Time in the update deck be published at the same length as a business Outlook.
Start Time: 31.9s, End Time: 43.0s

Sentence: And make forward-looking statements are based on our predictions and expectations as of today, actual events or results could differ materially due to a number of risks and uncertainties, including those mentioned our most recent filings with the SEC.
Start Time: 43.0s, End Time: 56.7s

Sentence: During the question-and-answer portion of today's call, please limit yourself to one question and one follow-up.
Start Time: 58.0s, End Time: 63.4s

Sentence: Please use the phrase hand button to join the quest