In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pip install python_speech_features

In [None]:
import pandas as pd
import os
from python_speech_features import mfcc
from python_speech_features import delta
from python_speech_features import logfbank
import scipy.io.wavfile as wav

In [None]:
# process features

codes_path = "./drive/MyDrive/606-project-data/Codes"
chunks_path = "./drive/MyDrive/606-project-data/Chunks"
all_name = os.listdir(chunks_path)
#val_name = ['Lily_ Pete the cat Day 5', 'Lily_ Stop and go Day 4 Part 2', 'Lily_ Cowboy Mouse Day 4B', 'Huang_ The princess and the pea']
#test_name = ["Liu_ Pete the Cat", 'Lily_ Stop and go Day 4 Part 1', 'Lily_ Cowboy Mouse Day 4A', 'Huang_ Maisy Goes Camping Day 3']

val_name = ['Lily_ Pete the cat Day 5']
test_name = ["Liu_ Pete the Cat"]

train_name = set(all_name) - set(val_name) - set(test_name)
train_name = list(train_name)

print(len(train_name)); print(train_name); print(len(val_name)); print(val_name); print(len(test_name)); print(test_name)
print(val_name in train_name); print(test_name in train_name)

In [None]:
train_data = []
for i in range(len(train_name)):
  print(train_name[i])
  audios_path = chunks_path + '/' + train_name[i]

  single_code_path = codes_path + '/' + train_name[i] + '.xlsx'
  single_code_file = pd.ExcelFile(single_code_path)
  single_code_file = single_code_file.parse('Sheet1', skiprows=5, index_col=None, usecols=range(1,10))
  single_code_file.rename({'Lang of Instruction Teacher/Student': 'Lang of Instruction Teacher',
                           'Unnamed: 9': 'Lang of Instruction Student'}, axis=1, inplace=True)
  
  predict_target = single_code_file['Lang of Instruction Teacher'] # change the prediction target here

  for j in range(60):

    single_20seconds_path = audios_path + '/' + train_name[i] + '[' + str(j) + '].wav'

    (rate,sig) = wav.read(single_20seconds_path)

    mfcc_feat = mfcc(sig, rate, nfft = 1103, winstep = 0.1) # change the feature type here
    #mfcc_feat = mfcc(sig, rate)
    #d_mfcc_feat = delta(mfcc_feat, 2)
    #fbank_feat = logfbank(sig,rate)
    mfcc_feat = mfcc_feat.reshape(1, mfcc_feat.shape[0], mfcc_feat.shape[1])

    label = predict_target[j] - 1

    train_data.append({'feature': mfcc_feat, 'label': label})

In [None]:
val_data = []
for i in range(len(val_name)):
  print(val_name[i])
  audios_path = chunks_path + '/' + val_name[i]

  single_code_path = codes_path + '/' + val_name[i] + '.xlsx'
  single_code_file = pd.ExcelFile(single_code_path)
  single_code_file = single_code_file.parse('Sheet1', skiprows=5, index_col=None, usecols=range(1,10))
  single_code_file.rename({'Lang of Instruction Teacher/Student': 'Lang of Instruction Teacher',
                           'Unnamed: 9': 'Lang of Instruction Student'}, axis=1, inplace=True)
  
  predict_target = single_code_file['Lang of Instruction Teacher'] # change the prediction target here

  for j in range(60):

    single_20seconds_path = audios_path + '/' + val_name[i] + '[' + str(j) + '].wav'

    (rate,sig) = wav.read(single_20seconds_path)

    mfcc_feat = mfcc(sig, rate, nfft = 1103, winstep = 0.1) # change the feature type here
    #mfcc_feat = mfcc(sig, rate)
    #d_mfcc_feat = delta(mfcc_feat, 2)
    #fbank_feat = logfbank(sig,rate)
    mfcc_feat = mfcc_feat.reshape(1, mfcc_feat.shape[0], mfcc_feat.shape[1])

    label = predict_target[j] - 1

    val_data.append({'feature': mfcc_feat, 'label': label})

In [None]:
test_data = []
for i in range(len(test_name)):
  print(test_name[i])
  audios_path = chunks_path + '/' + test_name[i]

  single_code_path = codes_path + '/' + test_name[i] + '.xlsx'
  single_code_file = pd.ExcelFile(single_code_path)
  single_code_file = single_code_file.parse('Sheet1', skiprows=5, index_col=None, usecols=range(1,10))
  single_code_file.rename({'Lang of Instruction Teacher/Student': 'Lang of Instruction Teacher',
                           'Unnamed: 9': 'Lang of Instruction Student'}, axis=1, inplace=True)
  
  predict_target = single_code_file['Lang of Instruction Teacher'] # change the prediction target here

  for j in range(60):

    single_20seconds_path = audios_path + '/' + test_name[i] + '[' + str(j) + '].wav'

    (rate,sig) = wav.read(single_20seconds_path)

    mfcc_feat = mfcc(sig, rate, nfft = 1103, winstep = 0.1) # change the feature type here
    #mfcc_feat = mfcc(sig, rate)
    #d_mfcc_feat = delta(mfcc_feat, 2)
    #fbank_feat = logfbank(sig,rate)
    mfcc_feat = mfcc_feat.reshape(1, mfcc_feat.shape[0], mfcc_feat.shape[1])

    label = predict_target[j] - 1

    test_data.append({'feature': mfcc_feat, 'label': label})

In [None]:
print(len(train_data)); print(len(test_data)); print(len(val_data))

In [None]:
import numpy as np

Y_train = []
for i in range(len(train_data)):
  if i == 0:
    X_train = train_data[i]['feature']
    Y_train.append(train_data[i]['label'])
  else:
    X_train = np.concatenate((X_train, train_data[i]['feature']), axis = 0)
    Y_train.append(train_data[i]['label'])

Y_train = np.array(Y_train)


Y_val = []
for i in range(len(val_data)):
  if i == 0:
    X_val = val_data[i]['feature']
    Y_val.append(val_data[i]['label'])
  else:
    X_val = np.concatenate((X_val, val_data[i]['feature']), axis = 0)
    Y_val.append(val_data[i]['label'])
    
Y_val = np.array(Y_val)


Y_test = []
for i in range(len(test_data)):
  if i == 0:
    X_test = test_data[i]['feature']
    Y_test.append(test_data[i]['label'])
  else:
    X_test = np.concatenate((X_test, test_data[i]['feature']), axis = 0)
    Y_test.append(test_data[i]['label'])
    
Y_test = np.array(Y_test)

print(X_train.shape); print(X_val.shape); print(X_test.shape); print(len(Y_train)); print(len(Y_val)); print(len(Y_test))

In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Flatten
import keras

In [None]:
input_shape=(401,13)
model = keras.Sequential()
model.add(LSTM(256, return_sequences=True, input_shape = input_shape))
model.add(LSTM(128, return_sequences=True))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(5,activation = 'softmax')) # 5 is the number of classes
model.summary()

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        "best_model.h5", save_best_only=True, monitor="val_accuracy"
    )
]

#callbacks = [
#    keras.callbacks.ModelCheckpoint(
#        "best_model.h5", save_best_only=True, monitor="val_loss"
#    ),
#    keras.callbacks.ReduceLROnPlateau(
#        monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001
#    ),
#    keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
#]

In [None]:
model.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, Y_train, epochs = 20, batch_size = 60, callbacks=callbacks, 
                    validation_data=(X_val, Y_val), shuffle = True)

In [None]:
history_dict=history.history
loss_values=history_dict['loss']
acc_values=history_dict['accuracy']
val_loss_values = history_dict['val_loss']
val_acc_values=history_dict['val_accuracy']
epochs=range(1,21)
fig,(ax1,ax2)=plt.subplots(1,2,figsize=(15,5))
ax1.plot(epochs,loss_values,'co',label='Training Loss')
ax1.plot(epochs,val_loss_values,'m', label='Validation Loss')
ax1.set_title('Training and validation loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.legend()
ax2.plot(epochs,acc_values,'co', label='Training accuracy')
ax2.plot(epochs,val_acc_values,'m',label='Validation accuracy')
ax2.set_title('Training and validation accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.legend()
plt.show()

In [None]:
model = keras.models.load_model("best_model.h5")

TrainLoss, Trainacc = model.evaluate(X_train, Y_train)
ValLoss, Valacc = model.evaluate(X_val, Y_val)
TestLoss, Testacc = model.evaluate(X_test, Y_test)

print("Accuracy on training set", Trainacc)

print("Accuracy on val set", Valacc)

print("Accuracy on test set", Testacc)

In [None]:
Y_pred = model.predict(X_test)
print(np.argmax(Y_pred,axis=1))
print(Y_test)
print('Confusion_matrix: ',tf.math.confusion_matrix(Y_test, np.argmax(Y_pred,axis=1)))