In [5]:
! pip install SpeechRecognition

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.3-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m40.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.3


In [8]:
import speech_recognition as sr

def audio_to_text(audio_file_path):
    # Initialize the recognizer
    recognizer = sr.Recognizer()

    # Load the audio file
    with sr.AudioFile(audio_file_path) as source:
        # Adjust for ambient noise
        recognizer.adjust_for_ambient_noise(source)

        # Record the audio
        audio = recognizer.record(source)

    try:
        # Recognize speech using Google Web Speech API
        text = recognizer.recognize_google(audio)
        return text
    except sr.UnknownValueError:
        print("Google Web Speech API could not understand audio")
    except sr.RequestError as e:
        print(f"Could not request results from Google Web Speech API; {e}")

# Specify the path to your audio file
audio_file_path = "/content/rpp162.wav"

# Convert audio to text
result = audio_to_text(audio_file_path)

# Print the result
print("Text from audio:", result)


Text from audio: hello everyone and welcome this is the rapid Planet podcast today is February 6th 2012 we're going to be talking about the latest and greatest technology news from the past 2 weeks but this episode is a little bit different because well we got new Mike's people that's right brand new podcasting Mike's in the house you can definitely hear a difference of how much better these sound so I'm really excited to get this and let's try them out let's get to some tech news hey Google


In [9]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [14]:
from pydub import AudioSegment
import os

def split_audio(input_file, output_folder, segment_duration=15):
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    audio = AudioSegment.from_wav(input_file)
    audio_duration = len(audio)

    for start_time in range(0, audio_duration, segment_duration * 1000):
        end_time = min(start_time + segment_duration * 1000, audio_duration)
        segment = audio[start_time:end_time]
        segment_path = os.path.join(output_folder, f"segment_{start_time//1000}-{end_time//1000}.wav")

        try:
            segment.export(segment_path, format="wav")
            print(f"Segment exported: {segment_path}")
        except Exception as e:
            print(f"Error exporting segment {start_time}-{end_time}: {e}")

# Specify the path to your large audio file
large_audio_file_path = "/content/rpp162.wav"

# Specify the output folder for segments
output_folder = "/content/audio_segments"

# Split the audio file into segments (15-second segments by default)
split_audio(large_audio_file_path, output_folder)



Segment exported: /content/audio_segments/segment_0-15.wav
Segment exported: /content/audio_segments/segment_15-30.wav
Segment exported: /content/audio_segments/segment_30-45.wav
Segment exported: /content/audio_segments/segment_45-60.wav
Segment exported: /content/audio_segments/segment_60-75.wav
Segment exported: /content/audio_segments/segment_75-90.wav
Segment exported: /content/audio_segments/segment_90-101.wav


In [21]:
from pydub import AudioSegment
import speech_recognition as sr

def transcribe_audio_segment(segment_file):
    recognizer = sr.Recognizer()

    with sr.AudioFile(segment_file) as source:
        audio_data = recognizer.record(source)

    try:
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        print(f"Speech recognition could not understand audio: {segment_file}")
        return ""
    except sr.RequestError as e:
        print(f"Speech recognition request error: {e}")
        return ""

def process_audio_file(input_audio_file, segment_duration=60):
    audio = AudioSegment.from_wav(input_audio_file)
    audio_duration = len(audio)

    segments = []
    for start_time in range(0, audio_duration, segment_duration * 1000):
        end_time = min(start_time + segment_duration * 1000, audio_duration)
        segment = audio[start_time:end_time]

        # Export segment to temporary WAV file
        segment_file = f"segment_{start_time//1000}-{end_time//1000}.wav"
        segment.export(segment_file, format="wav")

        # Transcribe the audio segment
        text = transcribe_audio_segment(segment_file)
        segments.append(text)

        # Clean up temporary segment file
        os.remove(segment_file)

    return segments

# Specify the path to your large audio file
large_audio_file_path = "/content/rpp162.wav"

# Process the audio file in segments (60-second segments by default)
transcribed_segments = process_audio_file(large_audio_file_path)

# Print the transcribed text for each segment
for i, segment_text in enumerate(transcribed_segments, start=1):
    print(f"Segment {i} Transcription: {segment_text}")


Segment 1 Transcription: hello everyone and welcome this is the rapid Planet podcast today is February 6th 2012 we're going to be talking about the latest and greatest technology news from the past 2 weeks but this episode is a little bit different because well we got new Mike's people that's right brand new podcasting Mike's in the house you can definitely hear a difference of how much better these sound so really excited to get this and let's try them out let's get to some tech news
Segment 2 Transcription: and we're ready to talk about some tech news so Microsoft now it's recently that it's going to be getting rid of its developer conference and my Xbox and it's kind of interesting because they just recently announced that they were pulling out of Cs 2012
