In [None]:
# SILENCE REMOVAL FROM ITALIAN DATA


import os
import time
from pydub import AudioSegment
from pydub.silence import split_on_silence

# Input and output directories
# input_root = r"C:\NPersonal\Projects\SDP\Prediction Stuff\Dataset\Italian Original Dataset"
# output_root = r"C:\NPersonal\Projects\SDP\Prediction Stuff\Dataset\Italian Silenced Dataset"

input_root= os.path.join("Dataset", "Italian Original Dataset")
output_root = os.path.join("Dataset", "Italian Silenced Dataset")

def process_audio(file_path, output_path):
    start = time.time()
    sound = AudioSegment.from_file(file_path, format="wav")
    
    # Silence removal parameters
    audio_chunks = split_on_silence(
        sound, 
        min_silence_len=100,
        silence_thresh=-40,
        keep_silence=50
    )
    
    # Merge all chunks back together
    combined = AudioSegment.empty()
    for chunk in audio_chunks:
        combined += chunk
    
    # Create necessary output directories
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    # Export the processed file
    combined.export(output_path, format="wav")
    
    duration = time.time() - start
    print(f"Processed in {duration:.2f}s: {os.path.basename(file_path)}")

total_start = time.time()

# Process both HC and PD directories
for group in ["HC", "PD"]:
    input_folder = os.path.join(input_root, group)
    output_folder = os.path.join(output_root, group)

    # Walk through all subdirectories recursively
    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.endswith(".wav"):
                # Get relative path to maintain folder structure
                relative_path = os.path.relpath(root, input_folder)
                in_path = os.path.join(root, file)
                out_path = os.path.join(output_folder, relative_path, file)
                
                process_audio(in_path, out_path)

total_duration = time.time() - total_start
print(f"\n✅ All files processed in {total_duration:.2f} seconds")


In [57]:
#CHUNKIFICATION OF ITALIAN DATA


import os
import time
from pydub import AudioSegment

input_root = os.path.join("Dataset", "Italian Silenced Dataset")
output_root = os.path.join("Dataset", "Italian Chunked Dataset 689")
CHUNK_MS = 689
total_chunks = 0

def process_chunks(file_path, output_folder):
    global total_chunks
    start = time.time()
    
    # Load the audio file
    sound = AudioSegment.from_file(file_path, format="wav")
    file_name = os.path.splitext(os.path.basename(file_path))[0]

    
    chunk_dir = os.path.join(output_folder, file_name)
    os.makedirs(chunk_dir, exist_ok=True)
    

    file_chunks = 0
    for i in range(len(sound) // CHUNK_MS):
        chunk = sound[i * CHUNK_MS : (i + 1) * CHUNK_MS]
        
        if len(chunk) == CHUNK_MS:  # Ensure chunk is exactly 100ms
            chunk.export(os.path.join(chunk_dir, f"{file_name}_{i+1}.wav"), format="wav")
            file_chunks += 1
    
    total_chunks += file_chunks
    duration = time.time() - start
    print(f"Processed {os.path.basename(file_path)} ({file_chunks} chunks) in {duration:.2f}s")

total_start = time.time()


for group in ["HC", "PD"]:
    input_folder = os.path.join(input_root, group)
    output_folder = os.path.join(output_root, group)


    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.endswith(".wav"):

                relative_path = os.path.relpath(root, input_folder)
                in_path = os.path.join(root, file)
                out_path = os.path.join(output_folder, relative_path)

                process_chunks(in_path, out_path)

total_duration = time.time() - total_start
print(f"\n✅ Total chunks created: {total_chunks}")
print(f"Total processing time: {total_duration:.2f} seconds")

Processed B1APNOTROC49M230320170926.wav (48 chunks) in 0.01s
Processed VE1APNOTROC49M230320170929.wav (13 chunks) in 0.00s
Processed VU2APNOTROC49M230320170932.wav (7 chunks) in 0.00s
Processed VA1APNOTROC49M230320170928.wav (4 chunks) in 0.00s
Processed D2APNOTROC49M230320170928.wav (4 chunks) in 0.00s
Processed B2APNOTROC49M230320170927.wav (53 chunks) in 0.01s
Processed VU1APNOTROC49M230320170932.wav (7 chunks) in 0.00s
Processed D1APNOTROC49M230320170928.wav (3 chunks) in 0.00s
Processed FB1APNOTROC49M230320170934.wav (21 chunks) in 0.00s
Processed VA2APNOTROC49M230320170929.wav (5 chunks) in 0.00s
Processed PR1APNOTROC49M230320170933.wav (22 chunks) in 0.00s
Processed VI1APNOTROC49M230320170930.wav (6 chunks) in 0.00s
Processed VI2APNOTROC49M230320170930.wav (8 chunks) in 0.00s
Processed VE2APNOTROC49M230320170930.wav (7 chunks) in 0.00s
Processed VO1APNOTROC49M230320170931.wav (8 chunks) in 0.00s
Processed VO2APNOTROC49M230320170931.wav (7 chunks) in 0.00s
Processed FB1GGIAORVG47

In [None]:
#ITALIAN RGB SPECTROGRAM DATA GENERATION 496x200


import os
import time
import gc
import librosa
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from PIL import Image

# Configure paths
# input_root = "Dataset/Italian Chunked Dataset"
# output_root = "Dataset/Italian Spectrogram Dataset"

# input_root = r"/home/nigmu/Downloads/Italian Chunked Dataset"
# output_root = r"/home/nigmu/NPersonal/Projects/SDP/nigmu-parkinsons_disease_prediction/Dataset/Italian RGB Spectrogram Dataset"

input_root = os.path.join("Dataset", "Italian Chunked Dataset")
output_root =  os.path.join("Dataset", "Italian RGB Spectrogram Dataset")


checkpoint_file = "processed_files.txt"

# Load previously processed files
processed_files_set = set()
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, "r") as f:
        processed_files_set = set(line.strip() for line in f)

def save_checkpoint(file_path):
    """Append a processed file to the checkpoint file."""
    with open(checkpoint_file, "a") as f:
        f.write(file_path + "\n")

def generate_parkinsons_spectrogram(audio_path, save_path):
    """Generate and save spectrogram images."""
    y, sr = librosa.load(audio_path, sr=16000, mono=True)

    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=512, hop_length=64, n_mels=64, fmax=4000, power=1.0
    )

    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

    fig = plt.figure(figsize=(3.2, 1.3), dpi=200)
    ax = fig.add_subplot(111)

    # plt.imshow(norm_spec, aspect='auto', cmap='gray', origin='lower')
    plt.imshow(norm_spec, aspect='auto', cmap='inferno', origin='lower')

    ax.axis('off')

    temp_path = save_path + "_tmp.png"
    plt.savefig(temp_path, bbox_inches='tight', pad_inches=0, format='png')

    # 🔄 Convert RGBA to RGB
    img = Image.open(temp_path).convert("RGB")
    img.save(save_path)
    os.remove(temp_path)  # Clean up


    

    plt.close(fig)

# Start time tracking
start_time = time.time()

# Get total number of files first
total_files = sum(
    len(files) for group in ["HC", "PD"] 
    for _, _, files in os.walk(os.path.join(input_root, group)) if any(f.endswith('.wav') for f in files)
)
processed_files = len(processed_files_set)

print(f"🔄 Processing {total_files} files... (Resuming from last checkpoint)")

# Process both HC and PD folders
for group in ["HC", "PD"]:
    input_folder = os.path.join(input_root, group)
    output_folder = os.path.join(output_root, group)

    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.lower().endswith(".wav"):
                in_path = os.path.join(root, file)
                out_dir = os.path.join(output_folder, os.path.relpath(root, input_folder))
                os.makedirs(out_dir, exist_ok=True)
                out_path = os.path.join(out_dir, f"{os.path.splitext(file)[0]}.png")

                # Skip if already processed
                if in_path in processed_files_set:
                    continue

                file_start_time = time.time()

                generate_parkinsons_spectrogram(in_path, out_path)

                # Save progress
                save_checkpoint(in_path)
                processed_files += 1

                elapsed_time = time.time() - start_time
                time_per_image = elapsed_time / processed_files
                remaining_files = total_files - processed_files
                estimated_remaining_time = remaining_files * time_per_image

                print(
                    f"✅ Processed {processed_files}/{total_files} files | "
                    f"Time per image: {time_per_image:.2f}s | "
                    f"Remaining: {remaining_files} files (~{estimated_remaining_time:.2f}s left)", end='\r'
                )

# Final statistics
total_time = time.time() - start_time
print(f"\n✅ Success! Created {processed_files} spectrogram images in {total_time:.2f}s")

In [59]:
#ITALIAN RGB SPECTROGRAM DATA GENERATION 224x224 Natural


import os
import time
import gc
import librosa
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from PIL import Image

# Configure paths
# input_root = "Dataset/Italian Chunked Dataset"
# output_root = "Dataset/Italian Spectrogram Dataset"

# input_root = r"/home/nigmu/Downloads/Italian Chunked Dataset"
# output_root = r"/home/nigmu/NPersonal/Projects/SDP/nigmu-parkinsons_disease_prediction/Dataset/Italian RGB Spectrogram Dataset"

input_root = os.path.join("Dataset", "Italian Chunked Dataset 689")
output_root =  os.path.join("Dataset", "Italian RGB Spectrogram Dataset 2")


checkpoint_file = "processed_files.txt"

# Load previously processed files
processed_files_set = set()
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, "r") as f:
        processed_files_set = set(line.strip() for line in f)

def save_checkpoint(file_path):
    """Append a processed file to the checkpoint file."""
    with open(checkpoint_file, "a") as f:
        f.write(file_path + "\n")

def generate_exact_spectrogram(audio_path, save_path):
    """Generate an exact 224x224 spectrogram from a trimmed 0.689s audio segment"""
    target_samples = 11024
    sr = 16000

    # Load audio
    y, _ = librosa.load(audio_path, sr=sr, mono=True)

    # Trim or pad audio to exact size
    if len(y) > target_samples:
        y = y[:target_samples]
    elif len(y) < target_samples:
        y = np.pad(y, (0, target_samples - len(y)))

    # Create Mel-spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, 
        n_fft=320, 
        hop_length=48,
        n_mels=128, 
        fmax=4000, 
        power=1.4
    )

    # Normalize and convert to image
    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

    # Resize to 224x224 without padding or data loss
    from PIL import Image
    img_array = np.flipud(norm_spec) * 255
    img = Image.fromarray(img_array).convert("L").resize((224, 224), Image.BICUBIC)

    # Convert to RGB image using inferno colormap
    cmap = matplotlib.colormaps['inferno']
    # rgba_array = cmap(np.flipud(norm_spec), bytes=True)
    rgba_array = cmap(np.array(img)/255.0, bytes=True)
    Image.fromarray(rgba_array).convert("RGB").save(save_path)
    print("Spectrogram shape:", mel_spec.shape)  # → (128, ~224)

# Start time tracking
start_time = time.time()

# Get total number of files first
total_files = sum(
    len(files) for group in ["HC", "PD"] 
    for _, _, files in os.walk(os.path.join(input_root, group)) if any(f.endswith('.wav') for f in files)
)
processed_files = len(processed_files_set)

print(f"🔄 Processing {total_files} files... (Resuming from last checkpoint)")

# Process both HC and PD folders
for group in ["HC", "PD"]:
    input_folder = os.path.join(input_root, group)
    output_folder = os.path.join(output_root, group)

    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.lower().endswith(".wav"):
                in_path = os.path.join(root, file)
                out_dir = os.path.join(output_folder, os.path.relpath(root, input_folder))
                os.makedirs(out_dir, exist_ok=True)
                out_path = os.path.join(out_dir, f"{os.path.splitext(file)[0]}.png")

                # Skip if already processed
                if in_path in processed_files_set:
                    continue

                file_start_time = time.time()

                generate_exact_spectrogram(in_path, out_path)

                # Save progress
                save_checkpoint(in_path)
                processed_files += 1

                elapsed_time = time.time() - start_time
                time_per_image = elapsed_time / processed_files
                remaining_files = total_files - processed_files
                estimated_remaining_time = remaining_files * time_per_image

                print(
                    f"✅ Processed {processed_files}/{total_files} files | "
                    f"Time per image: {time_per_image:.2f}s | "
                    f"Remaining: {remaining_files} files (~{estimated_remaining_time:.2f}s left)", end='\r'
                )

# Final statistics
total_time = time.time() - start_time
print(f"\n✅ Success! Created {processed_files} spectrogram images in {total_time:.2f}s")

🔄 Processing 16090 files... (Resuming from last checkpoint)
Spectrogram shape: (128, 230)
Spectrogram shape: (128, 230)ime per image: 0.04s | Remaining: 16089 files (~676.81s left)
Spectrogram shape: (128, 230)ime per image: 0.03s | Remaining: 16088 files (~482.72s left)
Spectrogram shape: (128, 230)ime per image: 0.02s | Remaining: 16087 files (~384.64s left)
Spectrogram shape: (128, 230)ime per image: 0.02s | Remaining: 16086 files (~342.52s left)
Spectrogram shape: (128, 230)ime per image: 0.02s | Remaining: 16085 files (~314.42s left)
Spectrogram shape: (128, 230)ime per image: 0.02s | Remaining: 16084 files (~298.69s left)
Spectrogram shape: (128, 230)ime per image: 0.02s | Remaining: 16083 files (~291.22s left)
Spectrogram shape: (128, 230)ime per image: 0.02s | Remaining: 16082 files (~283.25s left)
Spectrogram shape: (128, 230)ime per image: 0.02s | Remaining: 16081 files (~278.42s left)
Spectrogram shape: (128, 230)Time per image: 0.02s | Remaining: 16080 files (~271.00s left)

In [None]:
#SILENCE REMOVAL TESTING



from pydub import AudioSegment
from pydub.silence import split_on_silence

def process_audio(input_file, output_file):
    sound = AudioSegment.from_file(input_file, format="wav")
    audio_chunks = split_on_silence(sound, min_silence_len=100, silence_thresh=-45, keep_silence=500)
    
    combined = sum(audio_chunks, AudioSegment.empty())
    combined.export(output_file, format="wav")
    print(f"✅ Processed: {output_file}")

# Example usage
process_audio("C:/NPersonal/Projects/SDP/Prediction Stuff/Dataset/Italian Dataset/PD/Anna B/B1ABNINSAC46F240120171753.wav", "silenced output.wav")


In [28]:
# CHUNKIFICATION TESTING


from pydub import AudioSegment
import os

def chunkify_audio(file_path, chunk_duration_ms, output_dir="chunks"):
    audio = AudioSegment.from_file(file_path)
    total_duration_ms = len(audio)
    
    # Force trimming to avoid extra partial chunk
    num_chunks = total_duration_ms // chunk_duration_ms
    trimmed_audio = audio[:num_chunks * chunk_duration_ms]

    print(f"Original duration: {total_duration_ms} ms")
    print(f"Chunk size: {chunk_duration_ms} ms")
    print(f"Number of full chunks: {num_chunks}")

    os.makedirs(output_dir, exist_ok=True)

    for i in range(num_chunks):
        start = i * chunk_duration_ms
        end = start + chunk_duration_ms
        chunk = trimmed_audio[start:end]
        chunk_path = os.path.join(output_dir, f"chunk_{i:04d}.wav")
        chunk.export(chunk_path, format="wav")
        

# Example usage:
chunkify_audio("S_ID04_pd_2_0_1.wav", chunk_duration_ms=689, output_dir="my_audio_chunks")


Original duration: 42139 ms
Chunk size: 689 ms
Number of full chunks: 61


In [56]:
import os
import time
import librosa
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from PIL import Image

def generate_exact_spectrogram(audio_path, save_path):
    """Generate an exact 224x224 spectrogram from a trimmed 0.689s audio segment"""
    target_samples = 11024
    sr = 16000

    # Load audio
    y, _ = librosa.load(audio_path, sr=sr, mono=True)

    # Trim or pad audio to exact size
    if len(y) > target_samples:
        y = y[:target_samples]
    elif len(y) < target_samples:
        y = np.pad(y, (0, target_samples - len(y)))

    # Create Mel-spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, 
        n_fft=320, 
        hop_length=48,
        n_mels=128, 
        fmax=4000, 
        power=1.4
    )

    # Normalize and convert to image
    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    norm_spec = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

    # Resize to 224x224 without padding or data loss
    from PIL import Image
    img_array = np.flipud(norm_spec) * 255
    img = Image.fromarray(img_array).convert("L").resize((224, 224), Image.BICUBIC)

    # Convert to RGB image using inferno colormap
    cmap = matplotlib.colormaps['inferno']
    # rgba_array = cmap(np.flipud(norm_spec), bytes=True)
    rgba_array = cmap(np.array(img)/255.0, bytes=True)
    Image.fromarray(rgba_array).convert("RGB").save(save_path)
    print("Spectrogram shape:", mel_spec.shape)  # → (128, ~224)


# ======== CONFIGURATION ========
input_file = "my_audio_chunks/chunk_0040.wav"       # Your input WAV file
output_dir = "op"           # Where to save output PNG
# ===============================

os.makedirs(output_dir, exist_ok=True)

output_file = os.path.join(output_dir, 
                           os.path.basename(input_file).replace(".wav", ".png"))

start_time = time.time()

if os.path.exists(input_file):
    generate_exact_spectrogram(input_file, output_file)
    processing_time = time.time() - start_time
    print(f"✅ Processed {input_file} into 224x224 spectrogram")
    print(f"⏱ Took {processing_time:.2f}s | 📁 Saved to {output_file}")
else:
    print(f"❌ File not found: {input_file}")


Spectrogram shape: (128, 230)
✅ Processed my_audio_chunks/chunk_0040.wav into 224x224 spectrogram
⏱ Took 0.03s | 📁 Saved to op/chunk_0040.png
