In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install SpeechRecognition pydub

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.1-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub, SpeechRecognition
Successfully installed SpeechRecognition-3.10.1 pydub-0.25.1


In [13]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
from moviepy.editor import VideoFileClip
import speech_recognition as sr
from pydub import AudioSegment
import csv
from pydub.utils import make_chunks
from pydub.silence import split_on_silence

In [3]:
audios_folder="/content/drive/MyDrive/NPTEL Audios"

In [10]:
def mp3_to_wav(audio_file):
    print(f"Converting: {audio_file}")
    sound = AudioSegment.from_mp3(audio_file)
    wav_file = audio_file.replace(".mp3", ".wav")
    sound.export(wav_file, format="wav")
    return wav_file

In [14]:
def extract_text(audio_file):
    recognizer = sr.Recognizer()
    print(f"Extracting: {audio_file}")
    chunks = split_on_silence(audio_file,
      # must be silent for at least 0.5 seconds
      # or 500 ms. adjust this value based on user
      # requirement. if the speaker stays silent for
      # longer, increase this value. else, decrease it.
      min_silence_len = 500,

      # consider it silent if quieter than -16 dBFS
      # adjust this per requirement
      silence_thresh = -16
    )
    try:
        os.mkdir('audio_chunks')
    except(FileExistsError):
        pass
    # move into the directory to
    # store the audio files.
    os.chdir('audio_chunks')
    i = 0
    # process each chunk
    for chunk in chunks:

        # Create 0.5 seconds silence chunk
        chunk_silent = AudioSegment.silent(duration = 10)

        # add 0.5 sec silence to beginning and
        # end of audio chunk. This is done so that
        # it doesn't seem abruptly sliced.
        audio_chunk = chunk_silent + chunk + chunk_silent

        # export audio chunk and save it in
        # the current directory.
        print("saving chunk{0}.wav".format(i))
        # specify the bitrate to be 192 k
        audio_chunk.export("./chunk{0}.wav".format(i), bitrate ='192k', format ="wav")

        # the name of the newly created chunk
        filename = 'chunk'+str(i)+'.wav'

        print("Processing chunk "+str(i))

        # get the name of the newly created chunk
        # in the AUDIO_FILE variable for later use.
        file = filename
    with sr.AudioFile(file) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            return text
        except sr.UnknownValueError:
            return "Could not understand the audio"
        except sr.RequestError as e:
            return f"Error occurred: {e}"

In [6]:
video_csv = "/content/drive/MyDrive/VideoSubtitles.csv"
data = []

In [15]:

for filename in os.listdir(audios_folder):
    if filename.endswith(".mp3"):
        audio_file_path = os.path.abspath(os.path.join(audios_folder, filename))
        print(f"Processing: {audio_file_path}")
        if os.path.exists(audio_file_path):
            wav_file_path = mp3_to_wav(audio_file_path)
            extracted_text = extract_text(wav_file_path)
            data.append({"Video file name": filename, "Extracted text": extracted_text})
            os.remove(wav_file_path)
            i=i+1
        else:
            print(f"File not found: {audio_file_path}")

Processing: /content/drive/MyDrive/NPTEL Audios/priority queues and heaps.mp3
Converting: /content/drive/MyDrive/NPTEL Audios/priority queues and heaps.mp3
Extracting: /content/drive/MyDrive/NPTEL Audios/priority queues and heaps.wav


ValueError: could not convert string to float: '/content/drive/MyDrive/NPTEL Audios/priority queues and heaps.wav'

In [8]:
data

[{'Video file name': 'priority queues and heaps.mp3',
  'Extracted text': 'Error occurred: recognition connection failed: [Errno 32] Broken pipe'},
 {'Video file name': 'Prof  A  Basu.mp3',
  'Extracted text': 'welcome to the course of problem solving through programming the name of the course emphasizes problem-solving and the means of solving problems will be through programming and in particular in this course we will adopt the language C just as a vehicle to show how programming is done the major emphasis of this course will however be on the general concepts of programming the different constructs that are used in programming and how the tool the programming tool and technique can we use to solve different types of problems that we encountered in our life now if we say when we say that we will be trying to solve problems that question that naturally arrives is what type of problems are we going to solve we are not going to solve all sorts of problems that are there in the world of

In [None]:
output_csv="/content/drive/MyDrive/VideoSubtitles.csv"
with open(output_csv, mode='a', newline='', encoding='utf-8') as csvfile:
    fieldnames = ["Audio file name", "Extracted text"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for entry in data:
        writer.writerow(entry)

print(f"Extraction completed. Data saved in: {output_csv}")