In [None]:
# DOWNSAMPLIMG OF MDVR ORIGINAL DATASET


import os
import time
from pydub import AudioSegment

# Define relative paths
INPUT_ROOT = os.path.join("Dataset", "MDVR Original Dataset")
OUTPUT_ROOT = os.path.join("Dataset", "MDVR Downsampled Dataset")

# Create merged HC and PD folders
HC_FOLDER = os.path.join(OUTPUT_ROOT, "HC")
PD_FOLDER = os.path.join(OUTPUT_ROOT, "PD")
os.makedirs(HC_FOLDER, exist_ok=True)
os.makedirs(PD_FOLDER, exist_ok=True)

def downsample_audio(file_path, output_folder, prefix):
    """Downsamples an audio file to 16kHz and saves it with a prefix to prevent overwriting."""
    try:
        start = time.time()
        
        # Load audio
        sound = AudioSegment.from_file(file_path, format="wav")
        sound = sound.set_frame_rate(16000)  # Set sample rate to 16kHz
        
        # Save with "R_" or "S_" prefix to prevent overwriting
        file_name = os.path.basename(file_path)
        new_file_name = f"{prefix}_{file_name}"
        output_path = os.path.join(output_folder, new_file_name)
        
        sound.export(output_path, format="wav")

        duration = time.time() - start
        print(f"✅ Downsampled {file_name} → {new_file_name} in {duration:.2f}s")

    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")

# Start processing
total_start = time.time()
file_count = {"HC": 0, "PD": 0}

for subfolder, prefix in [("ReadText", "R"), ("SpontaneousDialogue", "S")]:  # Assign prefixes
    for group in ["HC", "PD"]:
        input_folder = os.path.join(INPUT_ROOT, subfolder, group)
        output_folder = HC_FOLDER if group == "HC" else PD_FOLDER  # Merge HC and PD

        if not os.path.exists(input_folder):
            print(f"❌ Skipping missing folder: {input_folder}")
            continue  # Skip if the folder doesn't exist

        for file in os.listdir(input_folder):
            if file.endswith(".wav"):
                in_path = os.path.join(input_folder, file)
                downsample_audio(in_path, output_folder, prefix)
                file_count[group] += 1  # Count processed files

# Print summary
total_duration = time.time() - total_start
print(f"\n✅ Downsampling complete in {total_duration:.2f} seconds")
print(f"📊 Total HC files: {file_count['HC']}, Total PD files: {file_count['PD']}")

# Print final structure
print("\n📁 Final Folder Structure:")
for root, dirs, files in os.walk(OUTPUT_ROOT):
    print(f"{root} ({len(files)} files)")


In [None]:
# SILENCE REMOVAL FROM MDVR DATA


import os
import time
from pydub import AudioSegment
from pydub.silence import split_on_silence

# Define dataset paths
INPUT_ROOT = os.path.join("Dataset", "MDVR Downsampled Dataset")
OUTPUT_ROOT = os.path.join("Dataset", "MDVR Silenced Dataset")

def process_audio(file_path, output_path):
    """Removes silence from an audio file and saves it in the same structure."""
    start = time.time()

    sound = AudioSegment.from_file(file_path, format="wav")

    # Split on silence
    audio_chunks = split_on_silence(
        sound, 
        min_silence_len=100, 
        silence_thresh=-40, 
        keep_silence=50
    )

    # Combine processed chunks
    combined = sum(audio_chunks, AudioSegment.empty()) if audio_chunks else sound  # Ensure non-empty output

    os.makedirs(os.path.dirname(output_path), exist_ok=True)  # Ensure output folder exists
    combined.export(output_path, format="wav")

    duration = time.time() - start
    print(f"✅ Processed {os.path.basename(file_path)} in {duration:.2f}s")

# Start processing
total_start = time.time()
total_files = 0

for group in ["HC", "PD"]:
    input_folder = os.path.join(INPUT_ROOT, group)
    output_folder = os.path.join(OUTPUT_ROOT, group)

    files = [f for f in os.listdir(input_folder) if f.endswith(".wav")]
    total_files += len(files)

    for file in files:
        in_path = os.path.join(input_folder, file)
        out_path = os.path.join(output_folder, file)
        
        process_audio(in_path, out_path)

# Print summary
total_duration = time.time() - total_start
print(f"\n✅ Silence removal complete. Total processing time: {total_duration:.2f} seconds")
print(f"📊 Total files processed: {total_files}")

# Print final structure
print("\n📁 Final Folder Structure:")
for root, dirs, files in os.walk(OUTPUT_ROOT):
    print(f"{root} ({len(files)} files)")

In [None]:
#CHUNKIFICATION OF MDVR DATA

import os
import time
from pydub import AudioSegment

# Define dataset paths
input_root = os.path.join("Dataset", "MDVR Silenced Dataset")
output_root = os.path.join("Dataset", "MDVR Chunked Dataset")
CHUNK_MS = 100
total_chunks = 0

def process_chunks(file_path, output_folder):
    """Splits an audio file into 100ms chunks and stores them in a folder named after the original file."""
    global total_chunks
    start = time.time()
    sound = AudioSegment.from_file(file_path, format="wav")
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    
    # Create a dedicated folder for chunks of this file
    chunk_dir = os.path.join(output_folder, file_name)
    os.makedirs(chunk_dir, exist_ok=True)
    
    # Generate and save chunks
    file_chunks = 0
    for i in range(len(sound) // CHUNK_MS):
        chunk = sound[i * CHUNK_MS:(i + 1) * CHUNK_MS]
        if len(chunk) == CHUNK_MS:  # Ensure full chunk length
            chunk.export(os.path.join(chunk_dir, f"{i+1}.wav"), format="wav")
            file_chunks += 1
    
    total_chunks += file_chunks
    duration = time.time() - start
    print(f"✅ Processed {os.path.basename(file_path)} ({file_chunks} chunks) in {duration:.2f}s")

# Start processing
total_start = time.time()

for group in ["HC", "PD"]:
    input_folder = os.path.join(input_root, group)
    output_folder = os.path.join(output_root, group)
    
    os.makedirs(output_folder, exist_ok=True)
    
    for file in os.listdir(input_folder):
        if file.endswith(".wav"):
            in_path = os.path.join(input_folder, file)
            process_chunks(in_path, output_folder)

# Print summary
total_duration = time.time() - total_start
print(f"\n✅ Total chunks created: {total_chunks}")
print(f"Total processing time: {total_duration:.2f} seconds")

In [None]:
#MDVR RGB SPECTRAL DATASET


import os
import time
import librosa
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from PIL import Image

# Configure paths
# input_root = "Dataset/MDVR Chunked Dataset"
# output_root = "Dataset/MDVR Spectrogram Dataset"

# input_root = r"/home/nigmu/Downloads/MDVR Chunked Dataset"
# output_root = r"/home/nigmu/NPersonal/Projects/SDP/nigmu-parkinsons_disease_prediction/Dataset/MDVR RGB Spectrogram Dataset"
input_root = os.path.join("Dataset", "MDVR Chunked Dataset")
output_root =  os.path.join("Dataset", "MDVR RGB Spectrogram Dataset 2")


os.makedirs(output_root, exist_ok=True)

def generate_parkinsons_spectrogram(audio_path, save_path):
    """Generate and save spectrogram images."""
    y, sr = librosa.load(audio_path, sr=16000, mono=True)

    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=512, hop_length=64, n_mels=64, fmax=4000, power=1.0
    )

    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

    fig = plt.figure(figsize=(3.2, 1.3), dpi=200)
    ax = fig.add_subplot(111)
    
    # plt.imshow(norm_spec, aspect='auto', cmap='viridis', origin='lower')
    plt.imshow(norm_spec, aspect='auto', cmap='inferno', origin='lower')

    ax.axis('off')

    temp_path = save_path + "_tmp.png"
    plt.savefig(temp_path, bbox_inches='tight', pad_inches=0, format='png')
    plt.close(fig)

    # 🔄 Convert RGBA to RGB
    img = Image.open(temp_path)
    img = img.convert("RGB")  # Force 3 channels
    img.save(save_path)
    os.remove(temp_path)  # Clean up


# Start time tracking
start_time = time.time()
processed_files = 0
total_files = sum(len(files) for _, _, files in os.walk(input_root) if any(f.endswith('.wav') for f in files))

print(f"🔄 Processing {total_files} files...")

for category in ["HC", "PD"]:
    category_input_folder = os.path.join(input_root, category)
    category_output_folder = os.path.join(output_root, category)

    if not os.path.exists(category_input_folder):
        continue  # Skip if category doesn't exist

    os.makedirs(category_output_folder, exist_ok=True)

    for chunk_folder in os.listdir(category_input_folder):
        chunk_input_path = os.path.join(category_input_folder, chunk_folder)
        chunk_output_path = os.path.join(category_output_folder, chunk_folder)

        if not os.path.isdir(chunk_input_path):
            continue  # Skip non-folder files

        os.makedirs(chunk_output_path, exist_ok=True)

        for file in os.listdir(chunk_input_path):
            if file.endswith(".wav"):
                in_path = os.path.join(chunk_input_path, file)
                out_path = os.path.join(chunk_output_path, f"{os.path.splitext(file)[0]}.png")

                generate_parkinsons_spectrogram(in_path, out_path)

                processed_files += 1
                elapsed_time = time.time() - start_time
                time_per_image = elapsed_time / processed_files
                remaining_files = total_files - processed_files
                estimated_remaining_time = remaining_files * time_per_image

                print(
                    f"✅ Processed {processed_files}/{total_files} files | "
                    f"Time per image: {time_per_image:.2f}s | "
                    f"Remaining: {remaining_files} files (~{estimated_remaining_time:.2f}s left)", end='\r'
                )

# Final statistics
total_time = time.time() - start_time
print(f"\n✅ Success! Created {processed_files} spectrogram images in {total_time:.2f}s")

In [None]:
#SILENCE REMOVAL TESTING



from pydub import AudioSegment
from pydub.silence import split_on_silence

def process_audio(input_file, output_file):
    sound = AudioSegment.from_file(input_file, format="wav")
    audio_chunks = split_on_silence(sound, min_silence_len=100, silence_thresh=-45, keep_silence=500)
    
    combined = sum(audio_chunks, AudioSegment.empty())
    combined.export(output_file, format="wav")
    print(f"✅ Processed: {output_file}")

# Example usage
process_audio("C:/NPersonal/Projects/SDP/Prediction Stuff/Dataset/Italian Dataset/PD/Anna B/B1ABNINSAC46F240120171753.wav", "silenced output.wav")


In [28]:
# CHUNKIFICATION TESTING


from pydub import AudioSegment
import os

def chunkify_audio(file_path, chunk_duration_ms, output_dir="chunks"):
    audio = AudioSegment.from_file(file_path)
    total_duration_ms = len(audio)
    
    # Force trimming to avoid extra partial chunk
    num_chunks = total_duration_ms // chunk_duration_ms
    trimmed_audio = audio[:num_chunks * chunk_duration_ms]

    print(f"Original duration: {total_duration_ms} ms")
    print(f"Chunk size: {chunk_duration_ms} ms")
    print(f"Number of full chunks: {num_chunks}")

    os.makedirs(output_dir, exist_ok=True)

    for i in range(num_chunks):
        start = i * chunk_duration_ms
        end = start + chunk_duration_ms
        chunk = trimmed_audio[start:end]
        chunk_path = os.path.join(output_dir, f"chunk_{i:04d}.wav")
        chunk.export(chunk_path, format="wav")
        

# Example usage:
chunkify_audio("S_ID04_pd_2_0_1.wav", chunk_duration_ms=689, output_dir="my_audio_chunks")


Original duration: 42139 ms
Chunk size: 689 ms
Number of full chunks: 61


In [56]:
import os
import time
import librosa
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from PIL import Image

def generate_exact_spectrogram(audio_path, save_path):
    """Generate an exact 224x224 spectrogram from a trimmed 0.689s audio segment"""
    target_samples = 11024
    sr = 16000

    # Load audio
    y, _ = librosa.load(audio_path, sr=sr, mono=True)

    # Trim or pad audio to exact size
    if len(y) > target_samples:
        y = y[:target_samples]
    elif len(y) < target_samples:
        y = np.pad(y, (0, target_samples - len(y)))

    # Create Mel-spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, 
        n_fft=320, 
        hop_length=48,
        n_mels=128, 
        fmax=4000, 
        power=1.4
    )

    # Normalize and convert to image
    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

    # Resize to 224x224 without padding or data loss
    from PIL import Image
    img_array = np.flipud(norm_spec) * 255
    img = Image.fromarray(img_array).convert("L").resize((224, 224), Image.BICUBIC)

    # Convert to RGB image using inferno colormap
    cmap = matplotlib.colormaps['inferno']
    # rgba_array = cmap(np.flipud(norm_spec), bytes=True)
    rgba_array = cmap(np.array(img)/255.0, bytes=True)
    Image.fromarray(rgba_array).convert("RGB").save(save_path)
    print("Spectrogram shape:", mel_spec.shape)  # → (128, ~224)


# ======== CONFIGURATION ========
input_file = "my_audio_chunks/chunk_0040.wav"       # Your input WAV file
output_dir = "op"           # Where to save output PNG
# ===============================

os.makedirs(output_dir, exist_ok=True)

output_file = os.path.join(output_dir, 
                           os.path.basename(input_file).replace(".wav", ".png"))

start_time = time.time()

if os.path.exists(input_file):
    generate_exact_spectrogram(input_file, output_file)
    processing_time = time.time() - start_time
    print(f"✅ Processed {input_file} into 224x224 spectrogram")
    print(f"⏱ Took {processing_time:.2f}s | 📁 Saved to {output_file}")
else:
    print(f"❌ File not found: {input_file}")


Spectrogram shape: (128, 230)
✅ Processed my_audio_chunks/chunk_0040.wav into 224x224 spectrogram
⏱ Took 0.03s | 📁 Saved to op/chunk_0040.png
