In [21]:
import os
import json
from pydub import AudioSegment

def time_to_milliseconds(time_str):
    h, m, s = map(int, time_str.split(':'))
    return (h * 3600 + m * 60 + s) * 1000

def create_folders_and_files(audio_path, json1_path, json2_path, output_folder):
    # Use absolute paths
    audio_path = os.path.abspath(audio_path)
    json1_path = os.path.abspath(json1_path)
    json2_path = os.path.abspath(json2_path)

    # Check if files exist
    # Check if files exist
    if not (os.path.exists(audio_path) and os.path.exists(json1_path) and os.path.exists(json2_path)):
        print(f"Audio file: {audio_path} - Exists: {os.path.exists(audio_path)}")
        print(f"JSON1 file: {json1_path} - Exists: {os.path.exists(json1_path)}")
        print(f"JSON2 file: {json2_path} - Exists: {os.path.exists(json2_path)}")
        raise FileNotFoundError("One or more input files not found.")


    # Load audio file
    audio = AudioSegment.from_mp3(audio_path)

    # Load JSON files
    with open(json1_path, 'r') as file:
        json1_data = json.load(file)

    with open(json2_path, 'r') as file:
        json2_data = json.load(file)

    # Create output folders
    output_folder_path = os.path.join(output_folder, 'speech', 'wavs')
    os.makedirs(output_folder_path, exist_ok=True)

    # Process data and create output files
    with open(os.path.join(output_folder, 'list.txt'), 'w') as list_file:
        for i in range(len(json1_data)):
            start_time = time_to_milliseconds(json1_data[i]["segment_start"])
            end_time = time_to_milliseconds(json1_data[i]["segment_end"])

            # Extract segment from audio
            segment = audio[start_time:end_time]

            # Save segment as WAV file
            wav_filename = f"{json1_data[i]['week']}_{json1_data[i]['page']}.wav"
            wav_filepath = os.path.join(output_folder_path, wav_filename)
            segment.export(wav_filepath, format="wav")

            # Write entry to list.txt
            list_file.write(f"{os.path.join('speech', 'wavs', wav_filename)}|{json2_data[i]['transcript']}\n")

if __name__ == "__main__":
    input_folder = "CoMoSpeech_VC\VQA"
    output_folder = "CoMoSpeech_VC\Audio_generation"
    
    audio_filename = 'week_05_2023-02-15_GMT20230216-004928_Recording_1988x1118.mp3'
    json1_filename = 'week_5_timestamp.json'
    json2_filename = 'week_5_transcript.json'

    audio_path = os.path.join(input_folder, audio_filename)
    json1_path = os.path.join(input_folder, json1_filename)
    json2_path = os.path.join(input_folder, json2_filename)

    create_folders_and_files(audio_path, json1_path, json2_path, output_folder)


In [None]:

from pydub import AudioSegment
import json
import os
import platform
from pathlib import Path
import tempfile

# platform_name = platform.system()

# def path_to_ffmpeg():
#     SCRIPT_DIR = Path(__file__).parent 
#     if platform_name == 'Windows':
#         return str(Path(SCRIPT_DIR, "win", "ffmpeg", "ffmpeg.exe"))
#     elif platform_name == 'Darwin':
#         return str(Path(SCRIPT_DIR, "mac", "ffmpeg", "ffmpeg"))
#     else:
#         return str(Path(SCRIPT_DIR, "linux", "ffmpeg", "ffmpeg"))

# AudioSegment.ffmpeg = path_to_ffmpeg()
    
# if platform_name == 'Windows':
#     os.environ["PATH"] += os.pathsep + str(Path(path_to_ffmpeg()).parent)
# else:
#     os.environ["LD_LIBRARY_PATH"] += ":" + str(Path(path_to_ffmpeg()).parent)

# path = os.path.dirname(os.path.realpath(__file__))
# tempfile.tempdir = path
# os.environ["PATH"] += os.pathsep + os.path.join(path, "bin")
# AudioSegment.converter = "D:\\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\ffmpeg\bin\ffmpeg.exe"
# AudioSegment.ffmpeg = "D:\\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\ffmpeg\bin\ffmpeg.exe"
#sound = AudioSegment.from_mp3("test.mp3")

def split_mp3_by_segments(mp3_file, json_file_time, json_file_transcript, output_folder):
    json_file_time = Path(json_file_time)
    json_file_transcript = Path(json_file_transcript)
    mp3_file = Path(mp3_file)
    output_folder = Path(output_folder)

    print("MP3 File:", mp3_file)
    print("JSON Time File:", json_file_time)
    print("JSON Transcript File:", json_file_transcript)
    print("Output Folder:", output_folder)
    
    with open(json_file_time, 'r') as f:
        time_data = json.load(f)

    with open(json_file_transcript, 'r') as f:
        transcript_data = json.load(f)

    # Output transcript file path
    transcript_file_path = os.path.join(output_folder, "transcript.txt")
    
    for time_segment, transcript_segment in zip(time_data, transcript_data):
        start_time = time_segment["segment_start"]
        end_time = time_segment["segment_end"]
        week = time_segment["week"]
        page = time_segment["page"]

        transcript = transcript_segment["transcript"]

        # Convert start and end timestamps to milliseconds
        start_ms = int(start_time.split(":")[0]) * 60 * 60 * 1000 + \
                   int(start_time.split(":")[1]) * 60 * 1000 + \
                   int(start_time.split(":")[2]) * 1000

        end_ms = int(end_time.split(":")[0]) * 60 * 60 * 1000 + \
                 int(end_time.split(":")[1]) * 60 * 1000 + \
                 int(end_time.split(":")[2]) * 1000

        # Load the MP3 file
        audio = AudioSegment.from_mp3(mp3_file)

        # Extract the segment
        segment = audio[start_ms:end_ms]

        # Create output folder if it doesn't exist
        #output_folder_path = os.path.join(output_folder, f"{week}_{page}")
        os.makedirs(output_folder_path, exist_ok=True)

        # Save the segment as WAV file
        output_wav_file = os.path.join(output_folder_path, f"{week}_{page}.wav")
        segment.export(output_wav_file, format="wav")

        # Append transcript information to the common transcript file
        with open(transcript_file_path, 'a') as transcript_file:
            transcript_file.write(f"{output_wav_file}/{transcript}\n")
            
        print("-"*100)
        print(f"DONE=>{week}_{page}")

if __name__ == "__main__":
    base_folder = r"D:\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC"
    json_file_time_path = r"D:\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\week_5_timestamp.json"
    json_file_transcript_path = r"D:\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\week_5_transcript.json"
    output_folder_path = r"./out"
   
    mp3_file_path = "week_5.mp3"
    json_file_time_path = "week_5_timestamp.json"
    
    json_file_transcript_path = "week_5_transcript.json"
    output_folder_path = "out"
    
    split_mp3_by_segments(mp3_file_path, json_file_time_path, json_file_transcript_path, output_folder_path)


In [13]:
import speech_recognition as sr

def transcribe_audio(audio_file_path):
    recognizer = sr.Recognizer()

    with sr.AudioFile(audio_file_path) as source:
        audio_data = recognizer.record(source)

        try:
            # Use Sphinx for transcription (works offline)
            text = recognizer.recognize_sphinx(audio_data)
            return text
        except sr.UnknownValueError:
            print("Sphinx could not understand the audio.")
        except sr.RequestError as e:
            print(f"Error with Sphinx recognizer; {e}")


    return None

# Replace 'your_audio_file.wav' with the path to your audio file
audio_file_path = r'D:\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\out\5_1.wav'
transcription = transcribe_audio(audio_file_path)

if transcription:
    print("Transcription:")
    print(transcription)
else:
    print("Transcription failed.")


Error with Sphinx recognizer; missing PocketSphinx module: ensure that PocketSphinx is set up correctly.
Transcription failed.


In [16]:
import speech_recognition as sr

def transcribe_audio(audio_file_path):
    recognizer = sr.Recognizer()

    with sr.AudioFile(audio_file_path) as source:
        # Adjust the energy threshold based on your audio
        recognizer.energy_threshold = 4000

        # Record the audio from the file
        audio_data = recognizer.record(source)

        try:
            # Use Sphinx for transcription (works offline)
            text = recognizer.recognize_sphinx(audio_data, language='en-US')
            return text
        except sr.UnknownValueError:
            print("Sphinx could not understand the audio.")
        except sr.RequestError as e:
            print(f"Error with Sphinx recognizer; {e}")

    return None

# Replace 'your_audio_file.wav' with the path to your audio file
audio_file_path = r'D:\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\out\5_2.wav'
transcription = transcribe_audio(audio_file_path)

if transcription:
    print("Transcription:")
    print(transcription)
else:
    print("Transcription failed.")


Transcription:
i'm as good an island is the longest laying low now is where the law but it is late when thou was you and he moi who have dual all is the useless all his laying on a day and you know is this enough we're we're we're now altman amenities and in lat latin got it all up come on hey them and again and the


Transcription:
you don't and you and ensign will now are in was no and i owe the get out without the hah that in whoa whoa what i do all along hundred on that is not a new road all and i killed a young on end as the road i'll i i i am today and we we are reviewing the saying the really good huh it of all at long island where the the moon long okay


In [17]:
import os
import json
from pydub import AudioSegment

def time_to_milliseconds(time_str):
    h, m, s = map(int, time_str.split(':'))
    return (h * 3600 + m * 60 + s) * 1000

def create_folders_and_files(audio_path, json1_path, json2_path, output_folder):
    # Load audio file
    audio = AudioSegment.from_mp3(audio_path)

    # Load JSON files
    with open(json1_path, 'r') as file:
        json1_data = json.load(file)

    with open(json2_path, 'r') as file:
        json2_data = json.load(file)

    # Create output folders
    output_folder_path = os.path.join(output_folder, 'speech', 'wavs')
    os.makedirs(output_folder_path, exist_ok=True)

    # Process data and create output files
    with open(os.path.join(output_folder, 'list.txt'), 'w') as list_file:
        for i in range(len(json1_data)):
            start_time = time_to_milliseconds(json1_data[i]["segment_start"])
            end_time = time_to_milliseconds(json1_data[i]["segment_end"])

            # Extract segment from audio
            segment = audio[start_time:end_time]

            # Save segment as WAV file
            wav_filename = f"{json1_data[i]['week']}_{json1_data[i]['page']}.wav"
            wav_filepath = os.path.join(output_folder_path, wav_filename)
            segment.export(wav_filepath, format="wav")

            # Write entry to list.txt
            list_file.write(f"{os.path.join('speech', 'wavs', wav_filename)}|{json2_data[i]['transcript']}\n")

if __name__ == "__main__":
    input_folder = "VQA"
    output_folder = "Audio_generation"
    
    # audio_path = os.path.join(input_folder,  'week_05_2023-02-15_GMT20230216-004928_Recording_1988x1118.mp3')
    # json1_path = os.path.join(input_folder,  'week_5_time.json')
    # json2_path = os.path.join(input_folder,  'week_5[1].json')
    
    audio_path = "week_5.mp3"
    json1_path = "week_5_timestamp.json"
    
    json2_path = "week_5_transcript.json"
    output_folder_path = "out"

    create_folders_and_files(audio_path, json1_path, json2_path, output_folder)


FileNotFoundError: [Errno 2] No such file or directory: 'week_5.mp3'

In [9]:
from pydub import AudioSegment
import json
import os

def split_mp3_by_segments(mp3_file, json_file_time, json_file_transcript, output_folder):
    print(mp3_file,json_file_time,json_file_transcript)
    
    with open(json_file_time, 'r') as f:
        time_data = json.load(f)

    with open(json_file_transcript, 'r') as f:
        transcript_data = json.load(f)

    for time_segment, transcript_segment in zip(time_data, transcript_data):
        start_time = time_segment["segment_start"]
        end_time = time_segment["segment_end"]
        week = time_segment["week"]
        page = time_segment["page"]

        transcript = transcript_segment["transcript"]

        # Convert start and end timestamps to milliseconds
        start_ms = int(start_time.split(":")[0]) * 60 * 60 * 1000 + \
                   int(start_time.split(":")[1]) * 60 * 1000 + \
                   int(start_time.split(":")[2]) * 1000

        end_ms = int(end_time.split(":")[0]) * 60 * 60 * 1000 + \
                 int(end_time.split(":")[1]) * 60 * 1000 + \
                 int(end_time.split(":")[2]) * 1000

        # Load the MP3 file
        audio = AudioSegment.from_mp3(mp3_file)

        # Extract the segment
        segment = audio[start_ms:end_ms]

        # Create output folder if it doesn't exist
        output_folder_path = os.path.join(output_folder, f"{week}_{page}")
        os.makedirs(output_folder_path, exist_ok=True)

        # Save the segment as WAV file
        output_wav_file = os.path.join(output_folder_path, f"{week}_{page}.wav")
        segment.export(output_wav_file, format="wav")

        # Write transcript information to text file
        text_file_path = os.path.join(output_folder_path, "transcript.txt")
        with open(text_file_path, 'a') as text_file:
            text_file.write(f"{output_wav_file}/{transcript}\n")

if __name__ == "__main__":
    mp3_file_path = "D:\\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\VQA\week_5.mp3"
    json_file_time_path = "D:\\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\VQA\week_5_timestamp.json"
    json_file_transcript_path = "D:\\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\VQA\week_5_transcript.json"
    output_folder_path = "D:\\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\out"

    split_mp3_by_segments(mp3_file_path, json_file_time_path, json_file_transcript_path, output_folder_path)


D:\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\VQA\week_5.mp3 D:\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\VQA\week_5_timestamp.json D:\Yeshiva\Spring24\AI_of_Application\CoMoSpeech_VC\VQA\week_5_transcript.json


FileNotFoundError: [WinError 2] The system cannot find the file specified