In [1]:
import matplotlib.pyplot as plt
import numpy as np
import librosa
import librosa.display
import IPython.display as ipd

## Zero Crossing

In [14]:
import numpy as np
import librosa
import pandas as pd
import os

# Initialize an empty list to store the results
results = []

# Loop over all the snare files
for i in range(94):
    # Construct the file name
    filename = f"../Sounds/Violin/Violin_{str(i).zfill(3)}.mp3"

    # Check if the file exists
    if not os.path.exists(filename):
        continue

    try:
        # Load the audio file
        y_orig, fs = librosa.load(filename, mono=True, sr=48000)

        output_time = 1 # seconds
        output_len = output_time * fs

        # number of samples of loaded file
        input_len = np.shape(y_orig)[0]

        # find a section with a high rms value
        jump = output_len//2 # spacing between sections
        rms_best = 0
        rms_best_start = 0

        # loop over sections of the sample to find the big with the best rms value
        for start in range(0, input_len - output_len, jump):
            end = start + output_len
            rms = np.sqrt(np.mean(np.square(y_orig[start:end])))
            if rms > rms_best:
                rms_best = rms
                rms_best_start = start

        y = y_orig[rms_best_start:rms_best_start + output_len]

        zero_crossings = librosa.zero_crossings(y, pad=False)
        mean = np.mean(zero_crossings)

        # Append the result to the list
        results.append([filename, mean])

    except Exception as e:
        print(f"Error processing file {filename}: {e}")

# Convert the results to a DataFrame and save as a CSV file
df = pd.DataFrame(results, columns=['filename', 'mean_zero_crossing_rate'])
df.to_csv('zero_crossing_rates.csv', index=False)


## Spectral Rolloff

In [56]:
import numpy as np
import librosa
import pandas as pd
import os

# Initialize an empty list to store the results
results = []

# Loop over all the snare files
for i in range(94):
    # Construct the file name
    filename = f"../Sounds/Violin/Violin_{str(i).zfill(3)}.mp3"

    # Check if the file exists
    if not os.path.exists(filename):
        continue

    try:
        # Load the audio file
        y_orig, fs = librosa.load(filename, mono=True, sr=48000)

        # Compute the spectral rolloff
        rolloff = librosa.feature.spectral_rolloff(y=y_orig, sr=48000, roll_percent=0.85)

        # Compute the mean spectral rolloff
        mean_rolloff = np.mean(rolloff)

        # Append the result to the list
        results.append([filename, mean_rolloff])

    except Exception as e:
        print(f"Error processing file {filename}: {e}")

# Convert the results to a DataFrame and save as a CSV file
df = pd.DataFrame(results, columns=['filename', 'mean_spectral_rolloff'])
df.to_csv('spectral_rolloffs.csv', index=False)


## Mel-Frequency Cepstral Coefficients (MFCCs)

In [23]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os

sound_types = ("Snare", "Trumpet", "Violin")

def generateMFCC(source_dir, out_dir):

# array of sound types available

    # iterate over each of the sound types
    for sound_type in sound_types:

        # establish output directories
        # TODO: Add checks that directorties exist
        sound_dir = os.fsencode(source_dir + "/" + sound_type)
        spec_dir = os.fsencode(out_dir + "/" + sound_type)

        if not os.path.exists(spec_dir):
            os.makedirs(spec_dir)
        
        # remove all existing spectrograms
        for spec_file in os.listdir(spec_dir):
            filename = os.fsdecode(spec_file)
            if filename.endswith(".png"):
                os.remove(os.path.join(spec_dir, spec_file))


        # iterate over each file (in each directory)
        for sound_file in os.listdir(sound_dir):
            filename = os.fsdecode(sound_file)

            # filter out audio files
            if filename.endswith(".wav") or filename.endswith(".mp3" or filename.endswith(".flac")):

                # Load the audio file
                y_orig, fs = librosa.load(os.path.join(sound_dir, sound_file), mono=True, sr=48000)

                output_time = 1 # seconds
                output_len = output_time * fs

                # number of samples of loaded file
                input_len = np.shape(y_orig)[0]

                # find a section with a high rms value
                jump = output_len//2 # spacing between sections
                rms_best = 0
                rms_best_start = 0

                # loop over sections of the sample to find the big with the best rms value
                for start in range(0, input_len - output_len, jump):
                    end = start + output_len
                    rms = np.sqrt(np.mean(np.square(y_orig[start:end])))
                    if rms > rms_best:
                        rms_best = rms
                        rms_best_start = start

                y = y_orig[rms_best_start:rms_best_start + output_len]

                # Compute the MFCCs
                mfccs = librosa.feature.mfcc(y=y, sr=48000)

                # Plot the MFCCs without title, axis labels, or colorbar
                plt.figure(figsize=(10, 4))
                librosa.display.specshow(mfccs, sr=48000, x_axis=None, y_axis=None)
                plt.axis('off')  # no axis
                plt.tight_layout(pad=0)  # no padding

                # Save the plot as a PNG file
                output_file = os.path.join(spec_dir, os.fsencode(os.path.splitext(filename)[0] + ".png"))
                
                plt.savefig(output_file, bbox_inches='tight', pad_inches=0)
                plt.close()


In [24]:
generateMFCC("../Test-data/Sounds", "../Test-data/MFCC")
generateMFCC("../Training-data/Sounds", "../Training-data/MFCC")

[src/libmpg123/id3.c:process_extra():681] error: No extra frame text / valid description?
[src/libmpg123/id3.c:process_extra():681] error: No extra frame text / valid description?
[src/libmpg123/id3.c:process_extra():681] error: No extra frame text / valid description?
[src/libmpg123/id3.c:process_extra():681] error: No extra frame text / valid description?


## Spectral Centroid

In [1]:
import numpy as np
import librosa
import pandas as pd
import os

# Initialize an empty list to store the results
results = []

# Loop over all the snare files
for i in range(94):
    # Construct the file name
    filename = f"../Sounds/Violin/Violin_{str(i).zfill(3)}.mp3"

    # Check if the file exists
    if not os.path.exists(filename):
        continue

    try:
        # Load the audio file
        y_orig, fs = librosa.load(filename, mono=True, sr=48000)

        # Compute the spectral centroid
        centroid = librosa.feature.spectral_centroid(y=y_orig, sr=48000)

        # Compute the mean spectral centroid
        mean_centroid = np.mean(centroid)

        # Append the result to the list
        results.append([filename, mean_centroid])

    except Exception as e:
        print(f"Error processing file {filename}: {e}")

# Convert the results to a DataFrame and save as a CSV file
df = pd.DataFrame(results, columns=['filename', 'mean_spectral_centroid'])
df.to_csv('spectral_centroids.csv', index=False)


## Chroma Feature

In [112]:
snare_file = "../Sounds/Snare/Snare_007.wav"
# snare_file = "../Sounds/Trumpet/Trumpet_006.mp3"

y_orig, fs = librosa.load(snare_file, mono=True, sr=48000)

output_time = 1 # seconds
output_len = output_time * fs

# number of samples of loaded file
input_len = np.shape(y_orig)[0]

# find a section with a high rms value
jump = output_len//2 # spacing between sections
rms_best = 0
rms_best_start = 0

# loop over sections of the sample to find the big with the best rms value
for start in range(0, input_len - output_len, jump):
    end = start + output_len
    rms = np.sqrt(np.mean(np.square(y_orig[start:end])))
    if rms > rms_best:
        rms_best = rms
        rms_best_start = start
        
y = y_orig[rms_best_start:rms_best_start + output_len]

y_harmonic, y_percussive = librosa.effects.hpss(y_orig)
print(y_harmonic)
print(y_percussive)

[ 2.9940209e-03  3.1773450e-03  9.3155326e-03 ... -1.2209699e-10
  2.6585531e-10  1.7678882e-10]
[-1.3262873e-04  1.5768720e-02  4.8297260e-02 ...  1.2209690e-10
 -2.6585867e-10 -1.7678591e-10]
