# MFCC Notebook

In [6]:
import numpy as np
import json
json_test_path = './data/test.json'

In [12]:
mfcc_obj = json.load(open(json_test_path, 'r'))
'''
{
  "sample_rate": 32000,
  "n_mfcc_coefficients": 13,
  "n_mel_filters": 40,
  "min_freq": 300,
  "max_freq": 8000,
  "mfcc_wav_frame_size": 1024,
  "window_overlap": 0.5,
  "seconds_per_frame": 0.032,
  "length_in_seconds": 12.832,
  "mfcc": [
    [-1339.66, -248.085, -14.3959, -197.458, -15.4143, -154.424, 16.0205, -110.371, -6.28836, -98.6062, -7.08902, -51.944, 17.5438],
    [-1188.31, -231.712, -44.0087, -187.237, -17.2241, -167.245, -17.5622, -134.19, -36.5111, -96.2681, -2.68158, -44.4747, -1.01415]
    ... ]
}
'''

'\n{\n  "sample_rate": 32000,\n  "n_mfcc_coefficients": 13,\n  "n_mel_filters": 40,\n  "min_freq": 300,\n  "max_freq": 8000,\n  "mfcc_wav_frame_size": 1024,\n  "window_overlap": 0.5,\n  "seconds_per_frame": 0.032,\n  "length_in_seconds": 12.832,\n  "mfcc": [\n    [-1339.66, -248.085, -14.3959, -197.458, -15.4143, -154.424, 16.0205, -110.371, -6.28836, -98.6062, -7.08902, -51.944, 17.5438],\n    [-1188.31, -231.712, -44.0087, -187.237, -17.2241, -167.245, -17.5622, -134.19, -36.5111, -96.2681, -2.68158, -44.4747, -1.01415]\n    ... ]\n}\n'

In [None]:
class Wav2Mfcc:
    def __init__(self, mfcc_obj):
        self.sample_rate = mfcc_obj['sample_rate']
        self.n_mfcc_coefficients = mfcc_obj['n_mfcc_coefficients']
        self.n_mel_filters = mfcc_obj['n_mel_filters']
        self.min_freq = mfcc_obj['min_freq']
        self.max_freq = mfcc_obj['max_freq']
        self.mfcc_wav_frame_size = mfcc_obj['mfcc_wav_frame_size']
        self.window_overlap = mfcc_obj['window_overlap']
        self.seconds_per_frame = mfcc_obj['seconds_per_frame']
        self.length_in_seconds = mfcc_obj['length_in_seconds']
        self.mfcc = np.array(mfcc_obj['mfcc'], dtype=np.float32)
        if 'mfcc_delta' in mfcc_obj and mfcc_obj['mfcc_delta'] is not None:
            self.delta_mfcc = np.array(mfcc_obj['mfcc_delta'], dtype=np.float32)
        else:
            self.delta_mfcc = np.zeros_like(self.mfcc)
        if 'mfcc_delta_delta' in mfcc_obj and mfcc_obj['mfcc_delta_delta'] is not None:
            self.delta_delta_mfcc = np.array(mfcc_obj['mfcc_delta_delta'], dtype=np.float32)
        else:
            self.delta_delta_mfcc = np.zeros_like(self.mfcc)
            self.calculate_mfcc_deltas()
        
    def get_mfcc(self):
        return self.mfcc
    def get_sample_rate(self):
        return self.sample_rate
    def get_n_mfcc_coefficients(self):
        return self.n_mfcc_coefficients
    def get_n_mel_filters(self):
        return self.n_mel_filters
    def get_min_freq(self):
        return self.min_freq
    def get_max_freq(self):
        return self.max_freq
    def get_mfcc_wav_frame_size(self):
        return self.mfcc_wav_frame_size
    def get_window_overlap(self):
        return self.window_overlap
    def get_seconds_per_frame(self):
        return self.seconds_per_frame
    def get_length_in_seconds(self):
        return self.length_in_seconds
    def get_mfcc_shape(self):
        return self.mfcc.shape
    def get_delta_mfcc(self):
        return self.delta_mfcc
    def get_delta_delta_mfcc(self):
        return self.delta_delta_mfcc

    def calculate_mfcc_deltas(self):
        """
        Calculate the delta of the MFCC features.
        """
        delta_time = self.seconds_per_frame * (1 - self.window_overlap)
        self.delta_mfcc = np.gradient(self.mfcc, delta_time , axis=0)
        self.delta_delta_mfcc = np.gradient(self.delta_mfcc, delta_time, axis=0)

    def batch_mfcc(self, seconds_per_batch=1.0, overlap=0.5):
        if self.delta_mfcc is None or self.delta_delta_mfcc is None:
            raise ValueError("Delta MFCC not calculated. Please calculate delta MFCC before batching.")
        """
        Split the MFCC features into batches.
        """
        ### Join all mfcc features with delta and delta_delta
        mfcc_combined = np.concatenate((self.mfcc, self.delta_mfcc, self.delta_delta_mfcc), axis=1)
        print("mfcc_combined shape:", mfcc_combined.shape)
        print("mfcc shape:", self.mfcc.shape)
        batch_size = int(seconds_per_batch / self.seconds_per_frame)
        if batch_size > len(self.mfcc):
            raise ValueError("Batch size is larger than the MFCC length.")
        overlap_size = int(batch_size * overlap)
        mfcc_batches = []
        for i in range(0, len(self.mfcc) - batch_size, batch_size - overlap_size):
            mfcc_batches.append(mfcc_combined[i:i + batch_size])
        return np.array(mfcc_batches)

    def dump_to_json(self, filename):
        """
        Save the MFCC object to a JSON file.
        """
        mfcc_obj = {
            'sample_rate': self.sample_rate,
            'n_mfcc_coefficients': self.n_mfcc_coefficients,
            'n_mel_filters': self.n_mel_filters,
            'min_freq': self.min_freq,
            'max_freq': self.max_freq,
            'mfcc_wav_frame_size': self.mfcc_wav_frame_size,
            'window_overlap': self.window_overlap,
            'seconds_per_frame': self.seconds_per_frame,
            'length_in_seconds': self.length_in_seconds,
            'mfcc': self.mfcc.tolist(),
            'mfcc_delta': self.delta_mfcc.tolist(),
            'mfcc_delta_delta': self.delta_delta_mfcc.tolist()
        }
        with open(filename, 'w') as f:
            json.dump(mfcc_obj, f, indent=4)

In [49]:
wav2mfcc_test = Wav2Mfcc(mfcc_obj)


In [50]:
wav2mfcc_test.batch_mfcc().shape

mfcc_combined shape: (775, 39)
mfcc shape: (775, 13)


(47, 31, 39)

### Batch load all json mfcc objects