# Read-Write Data (Outdated, replaced by generate_data_py)

Provides cells that reads from csv files and write data to .npy files for ease of use in other scripts. Needs csv files as provided by matlab files:
* `generate_data.m`

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
def load_mfcc_data(labelFilename, dataFilename, numCoeffs):
    dataLength = 5
    sampleRate = 44100
    labels = pd.read_csv(labelFilename).to_numpy()
    labels = np.array([0 if x == "ad" else 1 for x in labels])
    labels = labels.astype(dtype=np.int64)
    numSamples = labels.shape[0]
    print(f"Number of samples: {numSamples}")

    print("Start reading in MFCC's")
    df = pd.read_csv(dataFilename)
    numFrames = df.shape[1]//numCoeffs
    mfcc_data = np.empty((numSamples, numFrames, numCoeffs), dtype=np.float32)
    for index, row in df.iterrows():
        mfcc = row.values.reshape(numCoeffs, numFrames)
        mfcc = mfcc.T
        mfcc_data[index] = mfcc
    print("finished")

    return labels, mfcc_data

In [3]:
# Read mfcc's
labelFilename = "labels.csv"
dataFilename = "mfcc_data.csv"
numCoeffs = 12
mfcc_labels, mfcc_data = load_mfcc_data(labelFilename, dataFilename, numCoeffs)
print(f"Data Shape: {mfcc_data.shape}")
np.save("mfcc_labels.npy", mfcc_labels)
np.save("mfcc_data.npy", mfcc_data)

Number of samples: 967
Start reading in MFCC's
finished
Data Shape: (967, 498, 12)


In [4]:
def load_raw_data(labelFilename, dataFilename):
    dataLength = 5
    sampleRate = 44100
    labels = pd.read_csv(labelFilename).to_numpy()
    labels = np.array([0 if x == "ad" else 1 for x in labels])
    labels = labels.astype(dtype=np.int64)
    numSamples = labels.shape[0]
    print(f"Number of samples: {numSamples}")
    
    print("Start reading raw data")
    data_signals = pd.read_csv(dataFilename, dtype=np.float32).to_numpy()
    print("finished")

    #chunkSize = 100
    #sampleRate = 44100
    #dataLength = 5
    #for index, row in labels_raw.iterrows():
    #    if row.iloc[0] == 'ad':
    #        labels[index] = 0
    #    else:
    #        labels[index] = 1
    #print("finished labels")
    #print("start data")
    #for chunk in pd.read_csv(dataFilename, chunksize=chunkSize, dtype=np.float32):
    #    print(chunk.shape)
    #    i = 0
    #    for index, row in chunk.iterrows():
    #        data_signals[i] = row.values
    #        i += 1
    #print("finished data")
    
    return labels, data_signals

In [5]:
# Read Raw
labelFilename = "labels.csv"
dataFilename  = "raw_data.csv"
raw_labels, raw_data = load_raw_data(labelFilename, dataFilename)
print(f"Data Shape: {raw_data.shape}")
np.save("raw_labels.npy", raw_labels)
np.save("raw_data.npy", raw_data)

Number of samples: 967
Start reading raw data
finished
Data Shape: (967, 220500)


In [6]:
# Read Raw_Small
labelFilename = "labels_small.csv"
dataFilename  = "raw_data_small.csv"
raw_labels_small, raw_data_small = load_raw_data(labelFilename, dataFilename)
print(f"Data Shape: {raw_data_small.shape}")
np.save("raw_labels_small.npy", raw_labels_small)
np.save("raw_data_small.npy", raw_data_small)

Number of samples: 100
Start reading raw data
finished
Data Shape: (100, 220500)
