In [1]:
# FOR NOISE REDUCTION


# import numpy as np
# import librosa
# import librosa.display
# import noisereduce as nr
# import matplotlib.pyplot as plt
# import sounddevice as sd
# import soundfile as sf

# # Load the audio file
# file_path = "test_audio.wav"
# y, sr = librosa.load(file_path, sr=None)  # Load with original sampling rate

# # Plot original waveform
# plt.figure(figsize=(12, 4))
# plt.subplot(1, 2, 1)
# librosa.display.waveshow(y, sr=sr)
# plt.title("Original Audio Waveform")
# plt.xlabel("Time (s)")
# plt.ylabel("Amplitude")

# # Noise reduction
# reduced_noise = nr.reduce_noise(y=y, sr=sr, prop_decrease=0.5) #prop_decrease sets reduction level, larger value means less noise and less quality but smaller value means more noise but more quality as well

# # Plot denoised waveform
# plt.subplot(1, 2, 2)
# librosa.display.waveshow(reduced_noise, sr=sr)
# plt.title("Denoised Audio Waveform")
# plt.xlabel("Time (s)")
# plt.ylabel("Amplitude")
# plt.tight_layout()
# plt.show()

# # ✅ Save the cleaned audio using soundfile (Fixes the error)
# sf.write("denoised_audio.wav", reduced_noise, sr)

In [9]:
# DOWNSAMPLIMG OF MDVR ORIGINAL DATASET


import os
import time
from pydub import AudioSegment

# Define relative paths
INPUT_ROOT = os.path.join("Dataset", "MDVR Original Dataset")
OUTPUT_ROOT = os.path.join("Dataset", "MDVR Downsampled Dataset")

# Create merged HC and PD folders
HC_FOLDER = os.path.join(OUTPUT_ROOT, "HC")
PD_FOLDER = os.path.join(OUTPUT_ROOT, "PD")
os.makedirs(HC_FOLDER, exist_ok=True)
os.makedirs(PD_FOLDER, exist_ok=True)

def downsample_audio(file_path, output_folder, prefix):
    """Downsamples an audio file to 16kHz and saves it with a prefix to prevent overwriting."""
    try:
        start = time.time()
        
        # Load audio
        sound = AudioSegment.from_file(file_path, format="wav")
        sound = sound.set_frame_rate(16000)  # Set sample rate to 16kHz
        
        # Save with "R_" or "S_" prefix to prevent overwriting
        file_name = os.path.basename(file_path)
        new_file_name = f"{prefix}_{file_name}"
        output_path = os.path.join(output_folder, new_file_name)
        
        sound.export(output_path, format="wav")

        duration = time.time() - start
        print(f"✅ Downsampled {file_name} → {new_file_name} in {duration:.2f}s")

    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")

# Start processing
total_start = time.time()
file_count = {"HC": 0, "PD": 0}

for subfolder, prefix in [("ReadText", "R"), ("SpontaneousDialogue", "S")]:  # Assign prefixes
    for group in ["HC", "PD"]:
        input_folder = os.path.join(INPUT_ROOT, subfolder, group)
        output_folder = HC_FOLDER if group == "HC" else PD_FOLDER  # Merge HC and PD

        if not os.path.exists(input_folder):
            print(f"❌ Skipping missing folder: {input_folder}")
            continue  # Skip if the folder doesn't exist

        for file in os.listdir(input_folder):
            if file.endswith(".wav"):
                in_path = os.path.join(input_folder, file)
                downsample_audio(in_path, output_folder, prefix)
                file_count[group] += 1  # Count processed files

# Print summary
total_duration = time.time() - total_start
print(f"\n✅ Downsampling complete in {total_duration:.2f} seconds")
print(f"📊 Total HC files: {file_count['HC']}, Total PD files: {file_count['PD']}")

# Print final structure
print("\n📁 Final Folder Structure:")
for root, dirs, files in os.walk(OUTPUT_ROOT):
    print(f"{root} ({len(files)} files)")


✅ Downsampled ID00_hc_0_0_0.wav → R_ID00_hc_0_0_0.wav in 2.37s
✅ Downsampled ID01_hc_0_0_0.wav → R_ID01_hc_0_0_0.wav in 3.04s
✅ Downsampled ID03_hc_0_0_0.wav → R_ID03_hc_0_0_0.wav in 2.55s
✅ Downsampled ID05_hc_0_0_0.wav → R_ID05_hc_0_0_0.wav in 1.98s
✅ Downsampled ID08_hc_0_0_0.wav → R_ID08_hc_0_0_0.wav in 2.64s
✅ Downsampled ID09_hc_0_0_0.wav → R_ID09_hc_0_0_0.wav in 2.22s
✅ Downsampled ID10_hc_0_0_0.wav → R_ID10_hc_0_0_0.wav in 2.41s
✅ Downsampled ID11_hc_0_0_0.wav → R_ID11_hc_0_0_0.wav in 2.62s
✅ Downsampled ID12_hc_0_0_0.wav → R_ID12_hc_0_0_0.wav in 2.33s
✅ Downsampled ID14_hc_0_0_0.wav → R_ID14_hc_0_0_0.wav in 2.87s
✅ Downsampled ID15_hc_0_0_0.wav → R_ID15_hc_0_0_0.wav in 3.06s
✅ Downsampled ID19_hc_0_0_0.wav → R_ID19_hc_0_0_0.wav in 2.43s
✅ Downsampled ID21_hc_0_0_0.wav → R_ID21_hc_0_0_0.wav in 2.12s
✅ Downsampled ID22_hc_0_0_0.wav → R_ID22_hc_0_0_0.wav in 2.95s
✅ Downsampled ID23_hc_0_0_0.wav → R_ID23_hc_0_0_0.wav in 3.73s
✅ Downsampled ID25_hc_0_0_0.wav → R_ID25_hc_0_0_0.wav i

In [11]:
# DOWNSAMPLING OF ITALIAN DATASET


#IT'S ALREADY OF 16kHz

In [13]:
import os
import time
from pydub import AudioSegment
from pydub.silence import split_on_silence

# Define dataset paths
INPUT_ROOT = os.path.join("Dataset", "MDVR Downsampled Dataset")
OUTPUT_ROOT = os.path.join("Dataset", "MDVR Silenced Dataset")

def process_audio(file_path, output_path):
    """Removes silence from an audio file and saves it in the same structure."""
    start = time.time()

    sound = AudioSegment.from_file(file_path, format="wav")

    # Split on silence
    audio_chunks = split_on_silence(
        sound, 
        min_silence_len=100, 
        silence_thresh=-40, 
        keep_silence=500
    )

    # Combine processed chunks
    combined = sum(audio_chunks, AudioSegment.empty()) if audio_chunks else sound  # Ensure non-empty output

    os.makedirs(os.path.dirname(output_path), exist_ok=True)  # Ensure output folder exists
    combined.export(output_path, format="wav")

    duration = time.time() - start
    print(f"✅ Processed {os.path.basename(file_path)} in {duration:.2f}s")

# Start processing
total_start = time.time()
total_files = 0

for group in ["HC", "PD"]:
    input_folder = os.path.join(INPUT_ROOT, group)
    output_folder = os.path.join(OUTPUT_ROOT, group)

    files = [f for f in os.listdir(input_folder) if f.endswith(".wav")]
    total_files += len(files)

    for file in files:
        in_path = os.path.join(input_folder, file)
        out_path = os.path.join(output_folder, file)
        
        process_audio(in_path, out_path)

# Print summary
total_duration = time.time() - total_start
print(f"\n✅ Silence removal complete. Total processing time: {total_duration:.2f} seconds")
print(f"📊 Total files processed: {total_files}")

# Print final structure
print("\n📁 Final Folder Structure:")
for root, dirs, files in os.walk(OUTPUT_ROOT):
    print(f"{root} ({len(files)} files)")

✅ Processed R_ID00_hc_0_0_0.wav in 3.01s
✅ Processed R_ID01_hc_0_0_0.wav in 2.42s
✅ Processed R_ID03_hc_0_0_0.wav in 2.40s
✅ Processed R_ID05_hc_0_0_0.wav in 1.80s
✅ Processed R_ID08_hc_0_0_0.wav in 2.22s
✅ Processed R_ID09_hc_0_0_0.wav in 1.89s
✅ Processed R_ID10_hc_0_0_0.wav in 2.38s
✅ Processed R_ID11_hc_0_0_0.wav in 2.13s
✅ Processed R_ID12_hc_0_0_0.wav in 2.13s
✅ Processed R_ID14_hc_0_0_0.wav in 2.16s
✅ Processed R_ID15_hc_0_0_0.wav in 2.87s
✅ Processed R_ID19_hc_0_0_0.wav in 2.33s
✅ Processed R_ID21_hc_0_0_0.wav in 1.81s
✅ Processed R_ID22_hc_0_0_0.wav in 2.77s
✅ Processed R_ID23_hc_0_0_0.wav in 3.22s
✅ Processed R_ID25_hc_0_0_0.wav in 2.93s
✅ Processed R_ID26_hc_0_0_0.wav in 5.13s
✅ Processed R_ID28_hc_0_0_0.wav in 2.81s
✅ Processed R_ID31_hc_0_1_1.wav in 3.69s
✅ Processed R_ID35_hc_0_0_0.wav in 1.49s
✅ Processed R_ID36_hc_0_0_0.wav in 2.74s
✅ Processed S_ID00_hc_0_0_0.wav in 1.86s
✅ Processed S_ID01_hc_0_0_0.wav in 1.92s
✅ Processed S_ID03_hc_0_0_0.wav in 2.50s
✅ Processed S_ID

In [14]:
# SILENCE REMOVAL FROM ITALIAN DATA


import os
import time
from pydub import AudioSegment
from pydub.silence import split_on_silence

# Input and output directories
input_root = r"C:\NPersonal\Projects\SDP\Prediction Stuff\Dataset\Italian Dataset"
output_root = r"C:\NPersonal\Projects\SDP\Prediction Stuff\Dataset\Italian Silenced Dataset"

def process_audio(file_path, output_path):
    start = time.time()
    sound = AudioSegment.from_file(file_path, format="wav")
    
    # Silence removal parameters
    audio_chunks = split_on_silence(
        sound, 
        min_silence_len=100,
        silence_thresh=-40,
        keep_silence=500
    )
    
    # Merge all chunks back together
    combined = AudioSegment.empty()
    for chunk in audio_chunks:
        combined += chunk
    
    # Create necessary output directories
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    # Export the processed file
    combined.export(output_path, format="wav")
    
    duration = time.time() - start
    print(f"Processed in {duration:.2f}s: {os.path.basename(file_path)}")

total_start = time.time()

# Process both HC and PD directories
for group in ["HC", "PD"]:
    input_folder = os.path.join(input_root, group)
    output_folder = os.path.join(output_root, group)

    # Walk through all subdirectories recursively
    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.endswith(".wav"):
                # Get relative path to maintain folder structure
                relative_path = os.path.relpath(root, input_folder)
                in_path = os.path.join(root, file)
                out_path = os.path.join(output_folder, relative_path, file)
                
                process_audio(in_path, out_path)

total_duration = time.time() - total_start
print(f"\n✅ All files processed in {total_duration:.2f} seconds")


Processed in 1.46s: B1APGANRET55F170320171104.wav
Processed in 0.99s: B2APGANRET55F170320171105.wav
Processed in 0.10s: D1APGANRET55F170320171106.wav
Processed in 0.11s: D2APGANRET55F170320171107.wav
Processed in 0.51s: FB1APGANRET55F170320171115.wav
Processed in 0.84s: PR1APGANRET55F170320171114.wav
Processed in 0.16s: VA1APGANRET55F170320171107.wav
Processed in 0.10s: VA2APGANRET55F170320171108.wav
Processed in 0.16s: VE1APGANRET55F170320171109.wav
Processed in 0.08s: VE2APGANRET55F170320171109.wav
Processed in 0.14s: VI1APGANRET55F170320171109.wav
Processed in 0.13s: VI2APGANRET55F170320171110.wav
Processed in 0.20s: VO1APGANRET55F170320171111.wav
Processed in 0.09s: VO2APGANRET55F170320171111.wav
Processed in 0.15s: VU1APGANRET55F170320171112.wav
Processed in 0.11s: VU2APGANRET55F170320171112.wav
Processed in 0.50s: B1LBULCAAS94M100120171015.wav
Processed in 0.50s: B2LBULCAAS94M100120171015.wav
Processed in 0.61s: PR1LBULCAAS94M100120171015..wav
Processed in 0.51s: B1AMULCAAS94M100

In [15]:
#CHUNKIFICATION OF MDVR DATA

import os
import time
from pydub import AudioSegment

# Define dataset paths
input_root = os.path.join("Dataset", "MDVR Silenced Dataset")
output_root = os.path.join("Dataset", "MDVR Chunked Dataset")
CHUNK_MS = 100
total_chunks = 0

def process_chunks(file_path, output_folder):
    """Splits an audio file into 100ms chunks and stores them in a folder named after the original file."""
    global total_chunks
    start = time.time()
    sound = AudioSegment.from_file(file_path, format="wav")
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    
    # Create a dedicated folder for chunks of this file
    chunk_dir = os.path.join(output_folder, file_name)
    os.makedirs(chunk_dir, exist_ok=True)
    
    # Generate and save chunks
    file_chunks = 0
    for i in range(len(sound) // CHUNK_MS):
        chunk = sound[i * CHUNK_MS:(i + 1) * CHUNK_MS]
        if len(chunk) == CHUNK_MS:  # Ensure full chunk length
            chunk.export(os.path.join(chunk_dir, f"{i+1}.wav"), format="wav")
            file_chunks += 1
    
    total_chunks += file_chunks
    duration = time.time() - start
    print(f"✅ Processed {os.path.basename(file_path)} ({file_chunks} chunks) in {duration:.2f}s")

# Start processing
total_start = time.time()

for group in ["HC", "PD"]:
    input_folder = os.path.join(input_root, group)
    output_folder = os.path.join(output_root, group)
    
    os.makedirs(output_folder, exist_ok=True)
    
    for file in os.listdir(input_folder):
        if file.endswith(".wav"):
            in_path = os.path.join(input_folder, file)
            process_chunks(in_path, output_folder)

# Print summary
total_duration = time.time() - total_start
print(f"\n✅ Total chunks created: {total_chunks}")
print(f"Total processing time: {total_duration:.2f} seconds")

✅ Processed R_ID00_hc_0_0_0.wav (935 chunks) in 1.02s
✅ Processed R_ID01_hc_0_0_0.wav (1357 chunks) in 1.38s
✅ Processed R_ID03_hc_0_0_0.wav (978 chunks) in 0.97s
✅ Processed R_ID05_hc_0_0_0.wav (857 chunks) in 0.87s
✅ Processed R_ID08_hc_0_0_0.wav (744 chunks) in 0.71s
✅ Processed R_ID09_hc_0_0_0.wav (809 chunks) in 0.84s
✅ Processed R_ID10_hc_0_0_0.wav (1056 chunks) in 1.05s
✅ Processed R_ID11_hc_0_0_0.wav (372 chunks) in 0.33s
✅ Processed R_ID12_hc_0_0_0.wav (988 chunks) in 0.88s
✅ Processed R_ID14_hc_0_0_0.wav (761 chunks) in 0.69s
✅ Processed R_ID15_hc_0_0_0.wav (1507 chunks) in 1.43s
✅ Processed R_ID19_hc_0_0_0.wav (1106 chunks) in 1.01s
✅ Processed R_ID21_hc_0_0_0.wav (552 chunks) in 0.48s
✅ Processed R_ID22_hc_0_0_0.wav (1256 chunks) in 1.11s
✅ Processed R_ID23_hc_0_0_0.wav (1552 chunks) in 1.39s
✅ Processed R_ID25_hc_0_0_0.wav (1393 chunks) in 1.25s
✅ Processed R_ID26_hc_0_0_0.wav (1132 chunks) in 1.03s
✅ Processed R_ID28_hc_0_0_0.wav (1153 chunks) in 1.01s
✅ Processed R_ID31_

In [17]:
#CHUNKIFICATION OF ITALIAN DATA


import os
import time
from pydub import AudioSegment

input_root = os.path.join("Dataset", "Italian Silenced Dataset")
output_root = os.path.join("Dataset", "Italian Chunked Dataset")
CHUNK_MS = 100
total_chunks = 0

def process_chunks(file_path, output_folder):
    global total_chunks
    start = time.time()
    
    # Load the audio file
    sound = AudioSegment.from_file(file_path, format="wav")
    file_name = os.path.splitext(os.path.basename(file_path))[0]

    
    chunk_dir = os.path.join(output_folder, file_name)
    os.makedirs(chunk_dir, exist_ok=True)
    

    file_chunks = 0
    for i in range(len(sound) // CHUNK_MS):
        chunk = sound[i * CHUNK_MS : (i + 1) * CHUNK_MS]
        
        if len(chunk) == CHUNK_MS:  # Ensure chunk is exactly 100ms
            chunk.export(os.path.join(chunk_dir, f"{file_name}_{i+1}.wav"), format="wav")
            file_chunks += 1
    
    total_chunks += file_chunks
    duration = time.time() - start
    print(f"Processed {os.path.basename(file_path)} ({file_chunks} chunks) in {duration:.2f}s")

total_start = time.time()


for group in ["HC", "PD"]:
    input_folder = os.path.join(input_root, group)
    output_folder = os.path.join(output_root, group)


    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.endswith(".wav"):

                relative_path = os.path.relpath(root, input_folder)
                in_path = os.path.join(root, file)
                out_path = os.path.join(output_folder, relative_path)

                process_chunks(in_path, out_path)

total_duration = time.time() - total_start
print(f"\n✅ Total chunks created: {total_chunks}")
print(f"Total processing time: {total_duration:.2f} seconds")

Processed B1APGANRET55F170320171104.wav (1010 chunks) in 0.85s
Processed B2APGANRET55F170320171105.wav (770 chunks) in 0.66s
Processed D1APGANRET55F170320171106.wav (61 chunks) in 0.06s
Processed D2APGANRET55F170320171107.wav (70 chunks) in 0.05s
Processed FB1APGANRET55F170320171115.wav (391 chunks) in 0.41s
Processed PR1APGANRET55F170320171114.wav (661 chunks) in 0.54s
Processed VA1APGANRET55F170320171107.wav (130 chunks) in 0.11s
Processed VA2APGANRET55F170320171108.wav (63 chunks) in 0.05s
Processed VE1APGANRET55F170320171109.wav (103 chunks) in 0.08s
Processed VE2APGANRET55F170320171109.wav (56 chunks) in 0.05s
Processed VI1APGANRET55F170320171109.wav (99 chunks) in 0.08s
Processed VI2APGANRET55F170320171110.wav (59 chunks) in 0.05s
Processed VO1APGANRET55F170320171111.wav (95 chunks) in 0.11s
Processed VO2APGANRET55F170320171111.wav (64 chunks) in 0.06s
Processed VU1APGANRET55F170320171112.wav (98 chunks) in 0.08s
Processed VU2APGANRET55F170320171112.wav (64 chunks) in 0.05s
Proce

In [4]:
# import librosa
# import numpy as np
# import matplotlib.pyplot as plt

# def generate_mel_spectrogram(audio_path, save_path):
#     # Load audio (16kHz sample rate)
#     y, sr = librosa.load(audio_path, sr=16000)
    
#     # Ensure audio is exactly 100ms (1600 samples)
#     y = y[:1600] if len(y) > 1600 else np.pad(y, (0, 1600 - len(y)))
    
#     # Generate Mel spectrogram
#     mel_spec = librosa.feature.melspectrogram(
#         y=y,
#     sr=sr,
#     n_fft=256,        # Reduced for more time steps
#     hop_length=128,     # Smaller hop = wider spectrogram
#     n_mels=64,        # Taller spectrogram
#     fmax=4000
#     )
    
#     # Convert to dB and normalize
#     mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
#     norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())
    
#     # Create a NEW figure and save WITHOUT displaying
#     fig = plt.figure(figsize=(3, 2))  # Explicitly create a figure
#     plt.imshow(norm_spec, cmap='gray', origin='lower', aspect='auto')
#     plt.axis('off')
    
#     # Save FIRST, then close
#     plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
#     plt.close(fig)  # Critical: Close the figure to free memory

# # Example usage
# audio_file = "C:/NPersonal/Projects/SDP/Prediction Stuff/Dataset/MDVR Chunked Dataset/ReadText/HC/ID00_hc_0_0_0/ID00_hc_0_0_0_1.wav"
# save_file = "mel_spectrogram.png"
# generate_mel_spectrogram(audio_file, save_file)

In [2]:
# import librosa
# import numpy as np
# import matplotlib.pyplot as plt

# def generate_mel_spectrogram(audio_path, save_path):
#     # Load audio (100ms @ 16kHz = 1600 samples)
#     y, sr = librosa.load(audio_path, sr=16000)
    
#     # Generate Mel spectrogram
#     mel_spec = librosa.feature.melspectrogram(
#         y=y, sr=sr, 
#         n_fft=512,       # Frequency resolution
#         hop_length=128,   # Time resolution
#         n_mels=64,        # Height (frequency bins)
#         fmax=4000         # Focus on 0–4 kHz
#     )
    
#     # Convert to dB and normalize
#     mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
#     norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())
    
#     # Save as raw image (no resizing)
#     plt.figure(figsize=(3, 2))
#     plt.axis('off')
#     plt.imshow(norm_spec, cmap='gray', origin='lower', aspect='auto')
#     plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
#     plt.close()

# generate_mel_spectrogram("C:/NPersonal/Projects/SDP/Prediction Stuff/Dataset/MDVR Chunked Dataset/ReadText/HC/ID00_hc_0_0_0/ID00_hc_0_0_0_1.wav", "somefile")

In [6]:
# import librosa
# import numpy as np
# import matplotlib.pyplot as plt
# import librosa.display

# def generate_parkinsons_spectrogram(audio_path, save_path):
#     # Load audio with strict 16kHz sampling
#     y, sr = librosa.load(audio_path, mono=True)
    
#     # Generate Mel spectrogram with validation
#     mel_spec = librosa.feature.melspectrogram(
#         y=y,
#         sr=16000,
#         n_fft = 512,
#         hop_length = 128,
#         n_mels = 64,
#         fmax = 4000,
#         power=1.0  # Use power=1 for energy spectrogram
#     )

#     # Convert to dB scale and normalize
    
#     # mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
#     mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
#     norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

#     # # Create figure with proper dimensions
#     # fig = plt.figure(figsize=(3, 2), dpi=300)
#     # ax = fig.add_subplot(111)
    
#     # # Display spectrogram
#     # img = librosa.display.specshow(
#     #     norm_spec,
#     #     x_axis='time',
#     #     y_axis='mel',
#     #     sr=16000,
#     #     fmax=4000,
#     #     hop_length=128,
#     #     cmap='gray',
#     #     ax=ax
#     # )
    
#     # # Remove axes and save
#     # ax.axis('off')
#     # plt.tight_layout(pad=0)
#     # plt.savefig(
#     #     save_path,
#     #     bbox_inches='tight',
#     #     pad_inches=0,
#     #     dpi=100,
#     #     format='png'
#     # )
#     # plt.close(fig)

#     # Create figure with ML-optimized dimensions
#     target_height = 64    # Matches n_mels
#     target_width = 21     # (1600 - 256)//64 + 1 = 21 time steps
#     dpi = 100
    
#     fig = plt.figure(figsize=(target_width/dpi, target_height/dpi), dpi=dpi)
#     ax = fig.add_subplot(111)
    
#     # Display spectrogram
#     librosa.display.specshow(
#         norm_spec,
#         x_axis='time',
#         y_axis='mel',
#         sr=16000,
#         fmax=4000,
#         hop_length=64,
#         cmap='gray',
#         ax=ax
#     )
    
#     ax.axis('off')
#     plt.savefig(
#         save_path,
#         bbox_inches='tight',
#         pad_inches=0,
#         dpi=dpi
#     )
#     plt.close(fig)

#     # Quality check
#     print(f"Generated spectrogram dimensions: {norm_spec.shape}")
#     print(f"Min value: {norm_spec.min():.4f}, Max value: {norm_spec.max():.4f}")

# # Example usage
# audio_file = "C:/NPersonal/Projects/SDP/Prediction Stuff/Dataset/MDVR Chunked Dataset/ReadText/HC/ID00_hc_0_0_0/ID00_hc_0_0_0_1.wav"
# output_file = "parkinsons_spectrogram3.png"

# generate_parkinsons_spectrogram(audio_file, output_file)

Generated spectrogram dimensions: (64, 18)
Min value: 0.0000, Max value: 1.0000


In [35]:
# import librosa
# import numpy as np
# import matplotlib.pyplot as plt
# from PIL import Image

# def generate_parkinsons_spectrogram(audio_path, output_path, target_size=(64, 64)):
#     """
#     Processes 100ms audio chunk to create ML-ready spectrogram image.
    
#     Parameters:
#         audio_path (str): Path to input audio file (.wav)
#         output_path (str): Path to save resized spectrogram image (.png)
#         target_size (tuple): Final image dimensions (height, width)
#     """
#     # 1. Load and Preprocess Audio
#     y, sr = librosa.load(audio_path, sr=16000, mono=True)
#     y = y[:1600] if len(y) >= 1600 else np.pad(y, (0, max(0, 1600 - len(y))))
    
#     # 2. Generate Mel Spectrogram (Parkinson's-optimized)
#     n_fft = 512
#     hop_length = 64
#     n_mels = 64
#     fmax = 4000

#     mel_spec = librosa.feature.melspectrogram(
#         y=y,
#         sr=sr,
#         n_fft=n_fft,
#         hop_length=hop_length,
#         n_mels=n_mels,
#         fmax=fmax,
#         power=1.0
#     )
    
#     # 3. Convert to dB and Normalize
#     mel_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
#     norm_spec = (mel_db - mel_db.min()) / (mel_db.max() - mel_db.min())

#     # 4. Save Native Spectrogram (Temporary)
#     temp_path = "temp_native.png"
#     plt.figure(figsize=(3, 2))
#     plt.imshow(norm_spec, cmap='gray', aspect='auto', origin='lower')
#     plt.axis('off')
#     plt.savefig(temp_path, bbox_inches='tight', pad_inches=0)
#     plt.close()

#     # 5. Resize for ML Compatibility
#     img = Image.open(temp_path).convert('L')  # Convert to grayscale
#     resized_img = img.resize(target_size, Image.LANCZOS)
#     resized_img.save(output_path)

#     # 6. Cleanup and Validation
#     print(f"Native dimensions: {norm_spec.shape} → Resized: {target_size}")
#     print(f"Pixel range: [{resized_img.getextrema()[0]}, {resized_img.getextrema()[1]}]")

# # Usage Example
# generate_parkinsons_spectrogram(
#     audio_path="C:/NPersonal/Projects/SDP/Prediction Stuff/Dataset/MDVR Chunked Dataset/ReadText/HC/ID00_hc_0_0_0/ID00_hc_0_0_0_1.wav",
#     output_path="parkinsons_spec.png",
#     target_size=(64, 64)  # (height, width) for CNN compatibility
# )

Native dimensions: (64, 26) → Resized: (64, 64)
Pixel range: [0, 254]


In [40]:
# import librosa
# import numpy as np
# import matplotlib.pyplot as plt

# def generate_parkinsons_spectrogram(audio_path, save_path):
#     # Load audio with strict 16kHz sampling
#     y, sr = librosa.load(audio_path, sr=16000, mono=True)

#     # Generate Mel spectrogram
#     mel_spec = librosa.feature.melspectrogram(
#         y=y,
#         sr=sr,
#         n_fft=512,
#         hop_length=64,
#         n_mels=64,
#         fmax=4000,
#         power=1.0
#     )

#     # Convert to dB scale and normalize
#     mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
#     norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

#     # Create figure with optimal dimensions (64:32 aspect ratio)
#     fig = plt.figure(figsize=(3.2, 1.3), dpi=300)
#     ax = fig.add_subplot(111)

#     # Display spectrogram (for saving only)
#     plt.imshow(norm_spec, aspect='auto', cmap='gray', origin='lower')

#     # Remove axes and save
#     ax.axis('off')
#     plt.savefig(save_path, bbox_inches='tight', pad_inches=0, format='png')
#     plt.close(fig)

# # Example usage
# audio_file = "C:/NPersonal/Projects/SDP/Prediction Stuff/Dataset/MDVR Chunked Dataset/ReadText/HC/ID00_hc_0_0_0/ID00_hc_0_0_0_1.wav"
# output_file = "parkinsons_spectrogram2.1.png"

# generate_parkinsons_spectrogram(audio_file, output_file)


In [2]:
#MDVR SPECTRAL DATASET


import os
import time
import librosa
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from PIL import Image

# Configure paths
# input_root = "Dataset/MDVR Chunked Dataset"
# output_root = "Dataset/MDVR Spectrogram Dataset"

input_root = r"/home/nigmu/NPersonal/Projects/SDP/nigmu-parkinsons_disease_prediction/Dataset/MDVR Chunked Dataset"
output_root = r"/home/nigmu/NPersonal/Projects/SDP/nigmu-parkinsons_disease_prediction/Dataset/MDVR"

os.makedirs(output_root, exist_ok=True)

def generate_parkinsons_spectrogram(audio_path, save_path):
    """Generate and save spectrogram images."""
    y, sr = librosa.load(audio_path, sr=16000, mono=True)

    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=512, hop_length=64, n_mels=64, fmax=4000, power=1.0
    )

    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

    fig = plt.figure(figsize=(3.2, 1.3), dpi=200)

    ax = fig.add_subplot(111)
    plt.imshow(norm_spec, aspect='auto', cmap='gray', origin='lower')
    ax.axis('off')

    plt.savefig(save_path, bbox_inches='tight', pad_inches=0, format='png')

    
    
    def force_grayscale(image_path):
        img = Image.open(image_path).convert("L")  # Convert to grayscale (1 channel)
        img.save(image_path)  # Overwrite with grayscale version

    force_grayscale(save_path)

    
    
    plt.close(fig)

# Start time tracking
start_time = time.time()
processed_files = 0
total_files = sum(len(files) for _, _, files in os.walk(input_root) if any(f.endswith('.wav') for f in files))

print(f"🔄 Processing {total_files} files...")

for category in ["HC", "PD"]:
    category_input_folder = os.path.join(input_root, category)
    category_output_folder = os.path.join(output_root, category)

    if not os.path.exists(category_input_folder):
        continue  # Skip if category doesn't exist

    os.makedirs(category_output_folder, exist_ok=True)

    for chunk_folder in os.listdir(category_input_folder):
        chunk_input_path = os.path.join(category_input_folder, chunk_folder)
        chunk_output_path = os.path.join(category_output_folder, chunk_folder)

        if not os.path.isdir(chunk_input_path):
            continue  # Skip non-folder files

        os.makedirs(chunk_output_path, exist_ok=True)

        for file in os.listdir(chunk_input_path):
            if file.endswith(".wav"):
                in_path = os.path.join(chunk_input_path, file)
                out_path = os.path.join(chunk_output_path, f"{os.path.splitext(file)[0]}.png")

                generate_parkinsons_spectrogram(in_path, out_path)

                processed_files += 1
                elapsed_time = time.time() - start_time
                time_per_image = elapsed_time / processed_files
                remaining_files = total_files - processed_files
                estimated_remaining_time = remaining_files * time_per_image

                print(
                    f"✅ Processed {processed_files}/{total_files} files | "
                    f"Time per image: {time_per_image:.2f}s | "
                    f"Remaining: {remaining_files} files (~{estimated_remaining_time:.2f}s left)", end='\r'
                )

# Final statistics
total_time = time.time() - start_time
print(f"\n✅ Success! Created {processed_files} spectrogram images in {total_time:.2f}s")

🔄 Processing 64182 files...
✅ Processed 64182/64182 files | Time per image: 0.02s | Remaining: 0 files (~0.00s left))))))))
✅ Success! Created 64182 spectrogram images in 1283.24s


In [None]:
# #ITALIAN GRAPH DATA GENERATION


# import os
# import time
# import gc
# import librosa
# import numpy as np
# import matplotlib
# matplotlib.use('Agg')
# import matplotlib.pyplot as plt

# # Configure paths
# input_root = r"C:\NPersonal\Projects\SDP\Prediction Stuff\Dataset\Italian Chunked Dataset"
# output_root = r"C:\NPersonal\Projects\SDP\Prediction Stuff\Dataset\Italian Spectrogram Dataset"
# os.makedirs(output_root, exist_ok=True)

# def generate_parkinsons_spectrogram(audio_path, save_path):
#     """Generate and save spectrogram images."""
#     y, sr = librosa.load(audio_path, sr=16000, mono=True)

#     mel_spec = librosa.feature.melspectrogram(
#         y=y, sr=sr, n_fft=512, hop_length=64, n_mels=64, fmax=4000, power=1.0
#     )

#     mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
#     norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

#     fig = plt.figure(figsize=(3.2, 1.3), dpi=200)
#     ax = fig.add_subplot(111)

#     plt.imshow(norm_spec, aspect='auto', cmap='gray', origin='lower')
#     ax.axis('off')

#     plt.savefig(save_path, bbox_inches='tight', pad_inches=0, format='png')
#     plt.close(fig)

# # Start time tracking
# start_time = time.time()

# # Get total number of files first
# total_files = sum(
#     len(files) for group in ["HC", "PD"] 
#     for _, _, files in os.walk(os.path.join(input_root, group)) if any(f.endswith('.wav') for f in files)
# )
# processed_files = 0

# print(f"🔄 Processing {total_files} files...")

# # Process both HC and PD folders
# for group in ["HC", "PD"]:
#     input_folder = os.path.join(input_root, group)
#     output_folder = os.path.join(output_root, group)

#     for root, _, files in os.walk(input_folder):
#         for file in files:
#             if file.lower().endswith(".wav"):
#                 file_start_time = time.time()

#                 # Get relative path to maintain folder structure
#                 relative_path = os.path.relpath(root, input_folder)
#                 out_dir = os.path.join(output_folder, relative_path)
#                 os.makedirs(out_dir, exist_ok=True)

#                 # Define input and output paths
#                 in_path = os.path.join(root, file)
#                 out_path = os.path.join(out_dir, f"{os.path.splitext(file)[0]}.png")

#                 generate_parkinsons_spectrogram(in_path, out_path)

#                 processed_files += 1
#                 elapsed_time = time.time() - start_time
#                 time_per_image = elapsed_time / processed_files
#                 remaining_files = total_files - processed_files
#                 estimated_remaining_time = remaining_files * time_per_image

#                 print(
#                     f"✅ Processed {processed_files}/{total_files} files | "
#                     f"Time per image: {time_per_image:.2f}s | "
#                     f"Remaining: {remaining_files} files (~{estimated_remaining_time:.2f}s left)", end='\r'
#                 )

# # Final statistics
# total_time = time.time() - start_time
# print(f"\n✅ Success! Created {processed_files} spectrogram images in {total_time:.2f}s")


In [5]:
#ITALIAN SPECTROGRAM DATA GENERATION


import os
import time
import gc
import librosa
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from PIL import Image

# Configure paths
# input_root = "Dataset/Italian Chunked Dataset"
# output_root = "Dataset/Italian Spectrogram Dataset"

input_root = r"/home/nigmu/NPersonal/Projects/SDP/nigmu-parkinsons_disease_prediction/Dataset/Italian Chunked Dataset"
output_root = r"/home/nigmu/NPersonal/Projects/SDP/nigmu-parkinsons_disease_prediction/Dataset/Italian Dataset"
checkpoint_file = "processed_files.txt"

# Load previously processed files
processed_files_set = set()
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, "r") as f:
        processed_files_set = set(line.strip() for line in f)

def save_checkpoint(file_path):
    """Append a processed file to the checkpoint file."""
    with open(checkpoint_file, "a") as f:
        f.write(file_path + "\n")

def generate_parkinsons_spectrogram(audio_path, save_path):
    """Generate and save spectrogram images."""
    y, sr = librosa.load(audio_path, sr=16000, mono=True)

    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=512, hop_length=64, n_mels=64, fmax=4000, power=1.0
    )

    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

    fig = plt.figure(figsize=(3.2, 1.3), dpi=200)
    ax = fig.add_subplot(111)

    plt.imshow(norm_spec, aspect='auto', cmap='gray', origin='lower')
    ax.axis('off')

    plt.savefig(save_path, bbox_inches='tight', pad_inches=0, format='png')



    def force_grayscale(image_path):
        img = Image.open(image_path).convert("L")  # Convert to grayscale (1 channel)
        img.save(image_path)  # Overwrite with grayscale version

    force_grayscale(save_path)

    

    plt.close(fig)

# Start time tracking
start_time = time.time()

# Get total number of files first
total_files = sum(
    len(files) for group in ["HC", "PD"] 
    for _, _, files in os.walk(os.path.join(input_root, group)) if any(f.endswith('.wav') for f in files)
)
processed_files = len(processed_files_set)

print(f"🔄 Processing {total_files} files... (Resuming from last checkpoint)")

# Process both HC and PD folders
for group in ["HC", "PD"]:
    input_folder = os.path.join(input_root, group)
    output_folder = os.path.join(output_root, group)

    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.lower().endswith(".wav"):
                in_path = os.path.join(root, file)
                out_dir = os.path.join(output_folder, os.path.relpath(root, input_folder))
                os.makedirs(out_dir, exist_ok=True)
                out_path = os.path.join(out_dir, f"{os.path.splitext(file)[0]}.png")

                # Skip if already processed
                if in_path in processed_files_set:
                    continue

                file_start_time = time.time()

                generate_parkinsons_spectrogram(in_path, out_path)

                # Save progress
                save_checkpoint(in_path)
                processed_files += 1

                elapsed_time = time.time() - start_time
                time_per_image = elapsed_time / processed_files
                remaining_files = total_files - processed_files
                estimated_remaining_time = remaining_files * time_per_image

                print(
                    f"✅ Processed {processed_files}/{total_files} files | "
                    f"Time per image: {time_per_image:.2f}s | "
                    f"Remaining: {remaining_files} files (~{estimated_remaining_time:.2f}s left)", end='\r'
                )

# Final statistics
total_time = time.time() - start_time
print(f"\n✅ Success! Created {processed_files} spectrogram images in {total_time:.2f}s")

🔄 Processing 170243 files... (Resuming from last checkpoint)

✅ Success! Created 170243 spectrogram images in 2.92s


In [29]:
#SILENCE REMOVAL TESTING



from pydub import AudioSegment
from pydub.silence import split_on_silence

def process_audio(input_file, output_file):
    sound = AudioSegment.from_file(input_file, format="wav")
    audio_chunks = split_on_silence(sound, min_silence_len=100, silence_thresh=-45, keep_silence=500)
    
    combined = sum(audio_chunks, AudioSegment.empty())
    combined.export(output_file, format="wav")
    print(f"✅ Processed: {output_file}")

# Example usage
process_audio("C:/NPersonal/Projects/SDP/Prediction Stuff/Dataset/Italian Dataset/PD/Anna B/B1ABNINSAC46F240120171753.wav", "silenced output.wav")


✅ Processed: silenced output.wav


In [22]:
#SILENCE REMOVAL TESTING



from pydub import AudioSegment
from pydub.silence import split_on_silence

def process_audio(input_file, output_file):
    sound = AudioSegment.from_file(input_file, format="wav")
    audio_chunks = split_on_silence(sound, min_silence_len=100, silence_thresh=-40, keep_silence=500)
    
    combined = sum(audio_chunks, AudioSegment.empty())
    combined.export(output_file, format="wav")
    print(f"✅ Processed: {output_file}")

# Example usage
process_audio("C:/NPersonal/Projects/SDP/Prediction Stuff/Dataset/MDVR Trimmed Downsampled Dataset/ReadText/HC/ID00_hc_0_0_0.wav", "trimmed silenced output.wav")


✅ Processed: trimmed silenced output.wav
