<a href="https://colab.research.google.com/github/gbdionne/toneclone/blob/main/segment_and_spectrogram.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import numpy as np
import wave
import librosa
import librosa.display
import matplotlib.pyplot as plt
from scipy.io.wavfile import write

In [None]:
def split_wav(file_path, sample_length=5, overlap=1, num_segments=None):
    """
    Splits up a WAV file into multiple segments.

    Args:
        file_path (str): The path to the WAV file.
        sample_length (int): The length of each segment in seconds. Default is 5 second.
        overlap (int): Overlap between segments in seconds. Default is 1 second.
        num_segments (int or None): Number of segments from the beginning of
          the sample to return. If None, all segments are returned.

    Returns:
        list of tuples: [(segment_data, sample_rate), ...]
        If the last segment is shorter than sample_length it is not returned.
    """

    print(f"Processing file: {file_path}")

    with wave.open(file_path, 'rb') as wav:
        num_channels = wav.getnchannels()
        sample_width = wav.getsampwidth()
        sample_rate = wav.getframerate()
        num_frames = wav.getnframes()

        # Load all frames into a numpy array
        audio_data = np.frombuffer(wav.readframes(num_frames), dtype=np.int16)

        # Reshape if stereo
        if num_channels == 2:
            audio_data = audio_data.reshape(-1, 2)

        # Convert sample_length and overlap from seconds to number of samples
        samples_per_segment = sample_length * sample_rate
        overlap_samples = overlap * sample_rate
        step_size = samples_per_segment - overlap_samples

        # Create the short segments
        segments = []
        num_samples = len(audio_data)

        print(f"Total samples in the WAV file: {num_samples}")
        print(f"Samples rate: {sample_rate}")
        print(f"Splitting into {sample_length} second segments with {overlap} second overlap...")

        start = 0
        while start + samples_per_segment <= num_samples:
            segment_data = audio_data[start:start + samples_per_segment]
            segments.append((segment_data, sample_rate))
            start += step_size

            # Check if we've reached the desired number of segments
            if num_segments is not None and len(segments) >= num_segments:
                break

    return segments

def save_segments(segments, output_dir, base_filename="segment"):
    """
    Saves a list of audio segments returned by split_wav() to individual WAV files.

    Args:
        segments (list of tuples): [(segment_data, sample_rate), ...]
        output_dir (str): The directory where the WAV files will be saved.
        base_filename (str): The base filename for the saved files. Default is "segment".
    """

    os.makedirs(output_dir, exist_ok=True)

    print(f"Saving {len(segments)} segments to {output_dir}")

    for i, (segment, sample_rate) in enumerate(segments):
        output_path = os.path.join(output_dir, f"{base_filename}_{i+1}.wav")
        write(output_path, sample_rate, segment)
        print(f"Saved: {output_path}")

    print("Saved all segments.\n")

def save_spectrograms(segments, output_dir, num_segments=None,
                      spectrogram_type="mel", base_filename="spectrogram",
                      save_npy=True, save_png=False):
    """
    Saves spectrograms for a list of audio segments to the specified output directory.

    Args:
        segments (list of tuples): [(segment_data, sample_rate), ...]
        output_dir (str): The directory where the spectrograms will be saved.
        num_segments (int or None): Number of segments from the beginning of
          the segment list to save. If None, all segments are saved.
        spectrogram_type (str): Type of spectrogram to generate. Can be "mel" or "stft".
          Default is "mel".
        base_filename (str): The base filename for the saved files. Default is "spectrogram".
        save_npy (bool): Whether to save the spectrogram as a .npy file. Default is True.
        save_png (bool): Whether to save the spectrogram as a .png file. Default is False.

    Returns:
        Nothing
    """

    os.makedirs(output_dir, exist_ok=True)

    # Check how many segments to process
    if num_segments is None or num_segments > len(segments):
        num_segments = len(segments)

    print(f"Generating and saving {num_segments} spectrograms to {output_dir}...")

    for i, (segment, sample_rate) in enumerate(segments[:num_segments]):
        output_path = os.path.join(output_dir, f"{base_filename}_{i+1}.png")

        # Convert audio to floating point for Librosa processing
        # Normalize between -1 and 1
        audio_float = segment.astype(np.float32) / np.max(np.abs(segment))

        # Generate spectrogram
        if spectrogram_type == 'mel':
            sgrm = librosa.feature.melspectrogram(y=audio_float, sr=sample_rate, n_fft=2048, hop_length=512, n_mels=128)
        else:
            sgrm = librosa.stft(audio_float, n_fft=2048, hop_length=512)

        sgrm_db = librosa.amplitude_to_db(sgrm, ref=np.max)

        if save_npy:
            np.save(os.path.join(output_dir, f"{base_filename}_{i+1}.npy"), sgrm_db)
        if save_png:
            plt.figure(figsize=(10, 4))
            librosa.display.specshow(sgrm_db, sr=sample_rate, hop_length=512, x_axis='time', y_axis='mel' if spectrogram_type == 'mel' else 'log')

            plt.colorbar()
            plt.savefig(os.path.join(output_dir, f"{base_filename}_{i+1}.png"))
            plt.close()

    print("Saved all spectrograms.\n")


In [None]:
wav_file_clean = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_clean.wav"
wav_file_ODV = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_ODV.wav"
wav_file_DST = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_DST.wav"
wav_file_FUZ = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_FUZ.wav"
wav_file_TRM = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_TRM.wav"
wav_file_CHR = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_CHR.wav"
wav_file_PHZ = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_PHZ.wav"
wav_file_FLG = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_FLG.wav"
wav_file_DLY = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_DLY.wav"
wav_file_HLL = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_HLL.wav"
wav_file_PLT = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_PLT.wav"
wav_file_OCT = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_OCT.wav"
wav_file_FLT = "/content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_FLT.wav"


output_dir = '/content/drive/MyDrive/Capstone 210/Code/'

segments_clean = split_wav(wav_file_clean, sample_length=10, overlap=5, num_segments=1)
segments_ODV = split_wav(wav_file_ODV, sample_length=10, overlap=5, num_segments=1)
segments_DST = split_wav(wav_file_DST, sample_length=10, overlap=5, num_segments=1)
segments_FUZ = split_wav(wav_file_FUZ, sample_length=10, overlap=5, num_segments=1)
segments_TRM = split_wav(wav_file_TRM, sample_length=10, overlap=5, num_segments=1)
segments_CHR = split_wav(wav_file_CHR, sample_length=10, overlap=5, num_segments=1)
segments_PHZ = split_wav(wav_file_PHZ, sample_length=10, overlap=5, num_segments=1)
segments_FLG = split_wav(wav_file_FLG, sample_length=10, overlap=5, num_segments=1)
segments_DLY = split_wav(wav_file_DLY, sample_length=10, overlap=5, num_segments=1)
segments_HLL = split_wav(wav_file_HLL, sample_length=10, overlap=5, num_segments=1)
segments_PLT = split_wav(wav_file_PLT, sample_length=10, overlap=5, num_segments=1)
segments_OCT = split_wav(wav_file_OCT, sample_length=10, overlap=5, num_segments=1)
segments_FLT = split_wav(wav_file_FLT, sample_length=10, overlap=5, num_segments=1)


# Save the segments
save_segments(segments_clean, output_dir, base_filename="clean_segment")
save_segments(segments_ODV, output_dir, base_filename="ODV_segment")
save_segments(segments_DST, output_dir, base_filename="DST_segment")
save_segments(segments_FUZ, output_dir, base_filename="FUZ_segment")
save_segments(segments_TRM, output_dir, base_filename="TRM_segment")
save_segments(segments_CHR, output_dir, base_filename="CHR_segment")
save_segments(segments_PHZ, output_dir, base_filename="PHZ_segment")
save_segments(segments_FLG, output_dir, base_filename="FLG_segment")
save_segments(segments_DLY, output_dir, base_filename="DLY_segment")
save_segments(segments_HLL, output_dir, base_filename="HLL_segment")
save_segments(segments_PLT, output_dir, base_filename="PLT_segment")
save_segments(segments_OCT, output_dir, base_filename="OCT_segment")
save_segments(segments_FLT, output_dir, base_filename="FLT_segment")


save_spectrograms(segments_clean, output_dir, num_segments=1, base_filename="spec_clean", save_npy=False, save_png=True)
save_spectrograms(segments_ODV, output_dir, num_segments=1, base_filename="spec_ODV", save_npy=False, save_png=True)
save_spectrograms(segments_DST, output_dir, num_segments=1, base_filename="spec_DST", save_npy=False, save_png=True)
save_spectrograms(segments_FUZ, output_dir, num_segments=1, base_filename="spec_FUZ", save_npy=False, save_png=True)
save_spectrograms(segments_TRM, output_dir, num_segments=1, base_filename="spec_TRM", save_npy=False, save_png=True)
save_spectrograms(segments_CHR, output_dir, num_segments=1, base_filename="spec_CHR", save_npy=False, save_png=True)
save_spectrograms(segments_PHZ, output_dir, num_segments=1, base_filename="spec_PHZ", save_npy=False, save_png=True)
save_spectrograms(segments_FLG, output_dir, num_segments=1, base_filename="spec_FLG", save_npy=False, save_png=True)
save_spectrograms(segments_DLY, output_dir, num_segments=1, base_filename="spec_DLY", save_npy=False, save_png=True)
save_spectrograms(segments_HLL, output_dir, num_segments=1, base_filename="spec_HLL", save_npy=False, save_png=True)
save_spectrograms(segments_PLT, output_dir, num_segments=1, base_filename="spec_PLT", save_npy=False, save_png=True)
save_spectrograms(segments_OCT, output_dir, num_segments=1, base_filename="spec_OCT", save_npy=False, save_png=True)
save_spectrograms(segments_FLT, output_dir, num_segments=1, base_filename="spec_FLT", save_npy=False, save_png=True)





Processing file: /content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_clean.wav
Total samples in the WAV file: 420604933
Samples rate: 44100
Splitting into 10 second segments with 5 second overlap...
Processing file: /content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_ODV.wav
Total samples in the WAV file: 420604933
Samples rate: 44100
Splitting into 10 second segments with 5 second overlap...
Processing file: /content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_DST.wav
Total samples in the WAV file: 420604933
Samples rate: 44100
Splitting into 10 second segments with 5 second overlap...
Processing file: /content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_FUZ.wav
Total samples in the WAV file: 420604933
Samples rate: 44100
Splitting into 10 second segments with 5 second overlap...
Processing file: /content/drive/MyDrive/Capstone 210/Small Agg Dataset/small_agg_TRM.wav
Total samples in the WAV file: 420604933
Samples rate: 44100
Splitting in