# DSP Assignment
Name: Nipun Haldar

Roll Number: ECE16U014


**Note: The following code was run on *Google Colab* to get fast runtimes**

In [15]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# libraries to handle data
import os
import numpy as np
import csv
import pandas as pd

# library to extract audio features
import librosa

# libraries for preprocessing data 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# library to build neural networks
import tensorflow as tf

### Extracting all features from audio and writing to csv file

The following features are extracted
* Mel-frequency cepstral coefficients (MFCC)(20 in number)
* Spectral Centroid
* Zero Crossing Rate
* Chroma Frequencies
* Spectral Roll-off
* Root-Mean Square Energy

The correspong **data.csv** has 2000 rows and 27 columns of all the features

In [0]:
if os.path.exists('/content/drive/My Drive/data.csv'):
    data = pd.read_csv('/content/drive/My Drive/data.csv')

else:
    cols = ['filename','chroma_stft','rms','spectral_centroid','spectral_bandwidth','rolloff','zero_crossing_rate']
    for i in range(20):
        cols.append(f'mfcc_{i}')
    cols.append('label')
    f = open('data.csv','w',newline='')
    writer = csv.writer(f)
    writer.writerow(cols)
    f.close()
    
    for i in range(10):
        path = f'/content/drive/My Drive/wavs/{i}'
        fileList = os.listdir(path)
        for j in fileList:
            y, sr = librosa.load(f'{path}/{j}', mono=True)
            chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
            rms = librosa.feature.rms(y)
            spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
            spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
            rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
            zcr = librosa.feature.zero_crossing_rate(y)
            mfcc = librosa.feature.mfcc(y=y, sr=sr)
            row = [f'{j}',np.mean(chroma_stft),np.mean(rms),np.mean(spec_cent),np.mean(spec_bw),np.mean(rolloff),np.mean(zcr)]
            for val in range(20):
                row.append(np.mean(mfcc[val]))
            row.append(i)
            f = open('data.csv','a',newline='')
            writer = csv.writer(f)
            writer.writerow(row)
            f.close()

### Analysing the data using pandas

In [18]:
data.shape

(2000, 28)

In [19]:
# displaying only first five rows
print(data.head())

       filename  chroma_stft       rms  ...   mfcc_18   mfcc_19  label
0  zero_110.wav     0.419840  0.045476  ...  4.510585 -2.492918      0
1  zero_100.wav     0.456254  0.041470  ...  3.379130 -0.600034      0
2  zero_090.wav     0.455132  0.041122  ...  3.169573 -0.427649      0
3  zero_080.wav     0.417892  0.039619  ...  0.475260 -3.052964      0
4  zero_010.wav     0.355394  0.045962  ...  6.168610 -3.894580      0

[5 rows x 28 columns]


In [0]:
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)

### Feature Scaling
Standardize features by removing the mean and scaling to unit variance

The standard score of a sample ***x*** is calculated as:

*z = (x - u) / s*

where ***u*** is the mean of the training samples and ***s*** is the standard deviation of the training samples.

In [0]:
scaler = StandardScaler()
X = scaler.fit_transform(data.iloc[:, :-1])

### Type conversion
Converting the pandas `DataFrame` to numpy array

In [0]:
y = data.iloc[:,-1].to_numpy()

### Splitting the data into Testing and Training set

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

### Building and Training the Neural Network
A simple Artificial Neural Network (ANN) is used with 4 layers.

In [0]:
#stopping any tensorflow graphs running in background
tf.keras.backend.clear_session()

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

In [0]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [26]:
model.fit(X_train, y_train, epochs=10, batch_size=100)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f3b1248ba90>

### Evaluating the ANN
The network has an accuracy of **100%**. This because the dataset is very small (only 2000 rows). For a larger dataset the network should converge around 98%.

In [27]:
test_loss, test_acc = model.evaluate(X_test,y_test)
print('accuracy: ',test_acc,'\nloss: ',test_loss)

accuracy:  0.9975000023841858 
loss:  0.017465932294726372


### Predicting...
The model gives perfect predictions due to the fact mentioned above.

In [28]:
predictions = model.predict(X_test)
pred = np.argmax(predictions[4])
actual = y_test[4]
print('predicted value: ',pred,'\nactial value: ',actual)
pred = np.argmax(predictions[100])
actual = y_test[100]
print('predicted value: ',pred,'\nactial value: ',actual)

predicted value:  6 
actial value:  6
predicted value:  4 
actial value:  4
