# Read-Write Data

Provides cells that read and write data to .npy files for ease of use in other scripts

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
def load_mfcc_data(labelFilename, dataFilename, numCoeffs):
    labels_raw = pd.read_csv(dataFilename)
    numSamples = labels_raw.shape[0]
    print(f"Number of samples: {numSamples}")
    numFrames  = labels_raw.shape[1]//numCoeffs
    #labels = data_raw['label'].values
    labels = np.empty((numSamples), dtype=np.int64)
    mfcc_spectrograms = np.empty((numSamples, numFrames, numCoeffs), dtype=np.float32)  # Preallocate array
    for index, row in labels_raw.iterrows():
        if row.iloc[0] == 'ad':
            labels[index] = 0
        else:
            labels[index] = 1
    print("finished labels")
    print("start data")
    for index, row in pd.read_csv(dataFilename).iterrows():
        mfcc = row.values.reshape(numCoeffs, numFrames)
        mfcc = mfcc.T
        mfcc_spectrograms[index] = mfcc
    print("finished data")
    labels = np.array(labels)
    return labels, mfcc_spectrograms

In [3]:
def load_raw_data(labelFilename, dataFilename):
    labels_raw = pd.read_csv(labelFilename)
    numSamples = labels_raw.shape[0]
    print(f"Number of samples: {numSamples}")
    chunkSize = 100
    sampleRate = 44100
    dataLength = 5
    # preallocate
    labels = np.empty((numSamples), dtype=np.int64)
    data_signals = np.empty((numSamples, dataLength*sampleRate), dtype=np.float32)
    for index, row in labels_raw.iterrows():
        if row.iloc[0] == 'ad':
            labels[index] = 0
        else:
            labels[index] = 1
    print("finished labels")
    print("start data")
    for chunk in pd.read_csv(dataFilename, chunksize=chunkSize, dtype=np.float32):
        print(chunk.shape)
        i = 0
        for index, row in chunk.iterrows():
            data_signals[i] = row.values
            i += 1
    print("finished data")
    return labels, data_signals

In [4]:
# Read mfcc's
labelFilename = "labels.csv"
dataFilename = "mfcc_data.csv"
numCoeffs = 12
mfcc_labels, mfcc_data = load_mfcc_data(labelFilename, dataFilename, numCoeffs)
print(f"Data Shape: {mfcc_data.shape}")
np.save("mfcc_labels.npy", mfcc_labels)
np.save("mfcc_data.npy", mfcc_data)

Number of samples: 967
finished labels
start data
finished data
 Data Shape: (967, 498, 12)


In [None]:
# Read Raw
labelFilename = "labels.csv"
dataFilename  = "raw_data.csv"
raw_labels, raw_data = load_raw_data(labelFilename, dataFilename)
print(f"Data Shape: {raw_data.shape}")
np.save("raw_labels.npy", raw_labels)
np.save("raw_data.npy", raw_data)

Number of samples: 967
finished labels
start data
(100, 220500)
