In [None]:
!pip install git+https://github.com/openai/whisper.git -q -U

In [None]:
!pip install yt-dlp -q -U

In [None]:
!yt-dlp https://youtu.be/zmf1Kujygt8 --format m4a -o "/content/%(id)s.%(ext)s"
!whisper "/content/zmf1Kujygt8.m4a" --model small --language English

# convert mpeg 4 audio .m4a to mp3 python code

In [None]:
! pip install pydub

In [None]:
import os
from pydub import AudioSegment

def convert_m4a_to_mp3(input_file_path, output_file_path):
    """
    Converts an M4A audio file to MP3 format.
    """
    try:
        audio = AudioSegment.from_file(input_file_path, format="m4a")
        audio.export(output_file_path, format="mp3")
        print(f"Successfully converted '{input_file_path}' to '{output_file_path}'")
    except Exception as e:
        print(f"Error during conversion: {e}")

def convert_folder_m4a_to_mp3(input_folder, output_folder):
    """
    Converts all M4A files in a folder to MP3 format.
    
    Args:
        input_folder (str): Path to folder containing M4A files
        output_folder (str): Path to folder where MP3 files will be saved
    """
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    # Get all M4A files in the input folder
    for filename in os.listdir(input_folder):
        if filename.lower().endswith('.m4a'):
            input_file_path = os.path.join(input_folder, filename)
            
            # Create output filename (replace .m4a with .mp3)
            output_filename = os.path.splitext(filename)[0] + '.mp3'
            output_file_path = os.path.join(output_folder, output_filename)
            
            # Convert the file
            convert_m4a_to_mp3(input_file_path, output_file_path)

# Example usage:
input_folder = "/kaggle/input/fine-tuning-dataset/chineseaccent/chineseaccent"
output_folder = "chinese_accent"
convert_folder_m4a_to_mp3(input_folder, output_folder)

## Conversion of mpeg to mp3

In [None]:
# print("Hello")

In [None]:
from pydub import AudioSegment

# Input and output file paths
input_file = "/kaggle/input/fine-tuning-dataset/WhatsApp Audio 2025-11-04 at 9.42.20 PM.mpeg"
output_file = "WhatsApp Audio 2025-11-04 at 9.42.20 PM.mp3"

# Load the .mpeg file (make sure ffmpeg is installed and in PATH)
audio = AudioSegment.from_file(input_file, format="mpeg")

# Export to .mp3 format with the highest quality
audio.export(output_file, format="mp3", bitrate="320k")
print(f"‚úÖ Successfully converted to: {output_file}")


In [None]:
import subprocess
from pydub import AudioSegment
from IPython.display import Audio, display

input_file = "/kaggle/input/fine-tuning-dataset/WhatsApp Audio 2025-11-04 at 9.42.20 PM.mpeg"
output_file = "/kaggle/working/WhatsApp Audio 2025-11-04 at 9.42.20 PM_fixed.mp3"

# üß† Step 1: Detect the actual codec/format using ffprobe
def detect_codec(file_path):
    try:
        result = subprocess.run(
            [
                "ffprobe", "-v", "error",
                "-select_streams", "a:0",
                "-show_entries", "stream=codec_name",
                "-of", "default=noprint_wrappers=1:nokey=1",
                file_path
            ],
            capture_output=True, text=True, check=True
        )
        codec = result.stdout.strip()
        print(f"üéß Detected codec: {codec}")
        return codec
    except subprocess.CalledProcessError as e:
        print("‚ö†Ô∏è Could not detect codec:", e)
        return None

codec = detect_codec(input_file)

# üß© Step 2: Try decoding using the detected codec
try:
    audio = AudioSegment.from_file(input_file)
    print(f"‚úÖ Loaded audio successfully ({len(audio) / 1000:.2f} sec)")
except Exception as e:
    print("‚ö†Ô∏è Default load failed, retrying with codec name...")
    audio = AudioSegment.from_file(input_file, format=codec)

# üß© Step 3: Export to MP3 (high quality)
audio.export(output_file, format="mp3", bitrate="320k")
print(f"‚úÖ Exported fixed audio to: {output_file}")

# üß© Step 4: Play it
display(Audio(output_file))


## Conversion of m4a into mp3

In [None]:
import os
from pydub import AudioSegment

# Path to the folder containing .m4a files
input_folder = "/kaggle/input/fine-tuning-dataset/Voice memo"
output_folder = "Voice "

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Loop through all files in the folder
for filename in os.listdir(input_folder):
    if filename.lower().endswith(".m4a"):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, os.path.splitext(filename)[0] + ".mp3")

        # Load and convert
        audio = AudioSegment.from_file(input_path, format="m4a")
        audio.export(output_path, format="mp3")

        # print(f"‚úÖ Converted: {filename} ‚Üí {os.path.basename(output_path)}")

print("üéâ All conversions complete!")


## zip the folder

In [None]:
import shutil
import os

# Define paths
folder_to_zip = "/kaggle/working/Voice_memo"
output_zip_path = "/kaggle/working/Voice_memo"

# Create zip archive
shutil.make_archive(output_zip_path, 'zip', folder_to_zip)

print(f"Successfully created: {output_zip_path}.zip")

## Extracting voice data text from the audio mp3 samples


In [2]:
import shutil
import os

source_folder = "/kaggle/input/fine-tuning-dataset/chinese_accent"
destination_folder = "chinese_accent"

try:
    shutil.copytree(source_folder, destination_folder)
    print(f"Folder '{source_folder}' successfully copied to '{destination_folder}'")
except FileExistsError:
    print(f"Error: Destination folder '{destination_folder}' already exists.")
except Exception as e:
    print(f"An error occurred: {e}")


Folder '/kaggle/input/fine-tuning-dataset/chinese_accent' successfully copied to 'chinese_accent'


In [5]:
import os
import re

# Folder containing your .mp3 files
folder_path = "/kaggle/input/fine-tuning-dataset/chinese_accent"

# Output text file path
output_file = "text_data.txt"

# Get all .mp3 files, sorted
mp3_files = sorted([f for f in os.listdir(folder_path) if f.endswith(".mp3")])

lines = []

for filename in mp3_files:
    # Match pattern like "100_Add 900 using card payment.mp3"
    match = re.match(r"(\d+)_([\w\s]+.*)\.mp3", filename)
    if match:
        number = match.group(1)
        text = match.group(2).strip()
        lines.append(f"{number}. {text}")
    else:
        # In case file doesn't follow the pattern
        name_without_ext = os.path.splitext(filename)[0]
        lines.append(name_without_ext)

# Write all lines to text_data.txt
with open(output_file, "w", encoding="utf-8") as f:
    f.write("\n".join(lines))

print(f"‚úÖ Text data saved to: {output_file}")


‚úÖ Text data saved to: text_data.txt


In [6]:
# Removing trailing whitesapces in the voice data

import re

# Path to your input and output text files
input_file = "/kaggle/input/fine-tuning-dataset/voice_text_data/voice_text_data/my_voice_data.txt"
output_file = "my_voice_text_data.txt"

# Read all lines
with open(input_file, "r", encoding="utf-8") as f:
    lines = f.readlines()

# Clean and format each line
cleaned_lines = []
for line in lines:
    text = line.strip()  # remove leading/trailing spaces
    if not text:
        continue  # skip empty lines
    # Add a period if missing at the end
    if not re.search(r"[.!?]$", text):
        text += "."
    cleaned_lines.append(text)

# Join everything into a single paragraph separated by space
final_text = " ".join(cleaned_lines)

# Remove multiple spaces (if any)
final_text = re.sub(r"\s+", " ", final_text).strip()

# Write output
with open(output_file, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"‚úÖ Cleaned and merged text saved to: {output_file}")


‚úÖ Cleaned and merged text saved to: my_voice_text_data.txt
