In [3]:
import os

folders = [
    "audio_processed",
    "transcripts",
    "segments",
    "notebooks",
    "src",
    "docs",
    "tests"
]

files = {
    "": ["README.md", "requirements.txt", "LICENSE"],
    "src": [
        "preprocessing.py",
        "transcription.py",
        "segmentation.py",
        "summarization.py",
        "keyword_extraction.py",
        "ui_app.py"
    ]
}

project_name = "project"

os.makedirs(project_name, exist_ok=True)

for folder in folders:
    os.makedirs(os.path.join(project_name, folder), exist_ok=True)

for folder, file_list in files.items():
    for file in file_list:
        open(os.path.join(project_name, folder, file), "w").close()

print("‚úÖ Project folder structure created successfully")


‚úÖ Project folder structure created successfully


In [4]:
import os
import librosa
import soundfile as sf
import noisereduce as nr
import numpy as np
from tqdm import tqdm

# -----------------------------
# Paths (CHANGE ONLY ROOT PATH)
# -----------------------------
ROOT_DIR = r"C:\Users\hp\Desktop\project\Raw audio"

RAW_AUDIO_DIR = os.path.join(ROOT_DIR, "audio_raw")
CLEAN_AUDIO_DIR = os.path.join(ROOT_DIR, "audio_processed")

os.makedirs(CLEAN_AUDIO_DIR, exist_ok=True)

SUPPORTED_EXT = (".wav", ".mp3", ".m4a", ".flac")

print("üîä Starting audio cleaning process...")

# -----------------------------
# Audio Cleaning Loop
# -----------------------------
for root, _, files in os.walk(RAW_AUDIO_DIR):
    for file in tqdm(files):
        if file.lower().endswith(SUPPORTED_EXT):

            raw_path = os.path.join(root, file)

            try:
                # Load audio
                audio, sr = librosa.load(raw_path, sr=16000)

                # Noise reduction
                reduced_noise = nr.reduce_noise(y=audio, sr=sr)

                # Handle NaN values
                reduced_noise = np.nan_to_num(reduced_noise)

                # Normalize
                cleaned_audio = librosa.util.normalize(reduced_noise)

                # Save cleaned file
                clean_filename = file.split(".")[0] + ".wav"
                clean_path = os.path.join(CLEAN_AUDIO_DIR, clean_filename)

                sf.write(clean_path, cleaned_audio, sr)

            except Exception as e:
                print(f"‚ùå Error processing {file}: {e}")

print("‚úÖ Audio cleaning completed successfully!")


üîä Starting audio cleaning process...
‚úÖ Audio cleaning completed successfully!


In [5]:
print("RAW AUDIO PATH:", RAW_AUDIO_DIR)
print("Exists?", os.path.exists(RAW_AUDIO_DIR))


RAW AUDIO PATH: C:\Users\hp\Desktop\project\Raw audio\audio_raw
Exists? False
