<a href="https://colab.research.google.com/github/mhuckvale/pals0039/blob/master/Exercise_6_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[![PALS0039 Logo](https://www.phon.ucl.ac.uk/courses/pals0039/images/pals0039logo.png)](https://www.phon.ucl.ac.uk/courses/pals0039/)

# Exercise 6.3

In this exercise we build a phonetic recogniser that delivers for each 10ms frame of signal a probability distribution over phones.


---
(a) Set up the standard modules. Run the code and add comments.

In [None]:
# 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# 
%tensorflow_version 2.x
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Embedding, Flatten, SimpleRNN, LSTM, GRU, Bidirectional, Dropout, TimeDistributed
from tensorflow.keras.preprocessing.sequence import pad_sequences

---
(b) Read in the speech data and phone labels for training. Run the code and add comments.

In [None]:
# 
PHONES={ "sil":0, "b":1, "d":2, "g":3, "p":4, "t":5, "k":6, "dZ":7, "tS":8, 
        "m":9, "n":10, "N":11, "v":12, "D":13, "z":14, "Z":15, "f":16, "T":17, 
        "s":18, "S":19, "h":20, "r":21, "w":22, "l":23, "j":24, "i:":25, 
        "I":26, "e":27, "{":28, "V":29, "A:":30, "O:":31, "U":32, "u:":33, 
        "3:":34, "@":35, "aI":36, "eI":37, "OI":38, "aU":39, "@U":40 }

# 
def prepare_data_single(filename):
  # 
  df=pd.read_csv(filename)
  # 
  nvec=len(df)
  # 
  feats=df.iloc[:,2:].to_numpy();
  # 
  labels=np.array(list(map(PHONES.get, df.LABEL)))
  # 
  p = np.random.permutation(nvec)
  return feats[p,:],labels[p]

# 
Xtrain, ytrain = prepare_data_single("https://www.phon.ucl.ac.uk/courses/pals0039/data/phone-train.csv")
Xval, yval = prepare_data_single("https://www.phon.ucl.ac.uk/courses/pals0039/data/phone-valid.csv")
Xtest, ytest = prepare_data_single("https://www.phon.ucl.ac.uk/courses/pals0039/data/phone-test.csv")

#
print(Xtrain.shape,ytrain.shape)
print(Xval.shape,yval.shape)
print(Xtest.shape,ytest.shape)

# 
print(Xtrain[:10,:])
print(ytrain[:10])

---
(c) Build a simple context-free model. Run the code and add comments.

In [None]:
# 
isize=Xtrain.shape[1]
osize=len(PHONES)

# 
model = Sequential()
model.add(Dense(40,activation='tanh',input_shape=(isize,)))
model.add(Dense(40,activation='tanh'))
model.add(Dense(osize, activation='softmax'));
#
# 
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
print(model.summary())

---
(d) Train the simple model. Run the code and add comments.

In [None]:
#
history=model.fit(Xtrain,ytrain, batch_size=256, validation_data=(Xval,yval), epochs=25)
loss,accuracy=model.evaluate(Xtest,ytest)
print("Loss=%.3f Accuracy=%.1f%%" % (loss,100*accuracy))

---
(e) Read in the data again and prepare it for the sequence model. Run the code and add comments.

In [None]:
# 
def prepare_data_sequence(filename,seqlen):
  # 
  df=pd.read_csv(filename)
  # 
  nvec=len(df)
  nseq=nvec//seqlen
  # 
  nfeat=len(df.columns)-2
  # 
  feats=df.iloc[:(nseq*seqlen),2:].to_numpy();
  # 
  feats=np.reshape(feats,(nseq,seqlen,nfeat))
  # 
  labels=np.array(list(map(PHONES.get, df.LABEL)))
  # 
  labels=np.reshape(labels[:(nseq*seqlen)],(nseq,seqlen))
  # 
  p = np.random.permutation(nseq)
  return feats[p,:,:],labels[p,:]

# 
Xtrain, ytrain = prepare_data_sequence("https://www.phon.ucl.ac.uk/courses/pals0039/data/phone-train.csv",200)
Xval, yval = prepare_data_sequence("https://www.phon.ucl.ac.uk/courses/pals0039/data/phone-valid.csv",200)
Xtest, ytest = prepare_data_sequence("https://www.phon.ucl.ac.uk/courses/pals0039/data/phone-test.csv",200)

# 
print(Xtrain.shape,ytrain.shape)
print(Xval.shape,yval.shape)
print(Xtest.shape,ytest.shape)


---
(f) Build a sequence model with LSTM nodes. Run the code and add comments.

In [None]:
# 
seqlen=Xtrain.shape[1]
isize=Xtrain.shape[2]
osize=len(PHONES)

#
model = Sequential()
model.add(Bidirectional(LSTM(32,return_sequences=True),input_shape=(seqlen,isize),merge_mode='ave'))
model.add(Bidirectional(LSTM(32,return_sequences=True),merge_mode='ave'))
model.add(TimeDistributed(Dense(osize, activation='softmax')))
#
# 
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
print(model.summary())

---
(g) Train sequence model. Run the code and add comments.

In [None]:
# 
history=model.fit(Xtrain,ytrain, batch_size=16, validation_data=(Xval,yval), epochs=25)
loss,accuracy=model.evaluate(Xtest,ytest)
print("Loss=%.3f Accuracy=%.1f%%" % (loss,100*accuracy))

---
(h) Analyse network performance by phone. Run the code and add comments.

In [None]:
# 
ypred=model.predict(Xtest)
ypred=np.reshape(ypred,(ypred.shape[0]*ypred.shape[1],ypred.shape[2]))
ypred=np.argmax(ypred,axis=1)
ytrue=np.reshape(ytest,(ytest.shape[0]*ytest.shape[1],))

# 
labellist=PHONES.keys()

# 
y_actu = pd.Categorical.from_codes(ytrue, categories=labellist)
y_pred = pd.Categorical.from_codes(ypred, categories=labellist)
df_confusion = pd.crosstab(y_actu, y_pred, margins=False, normalize='index',dropna=False)
df_confusion


---
(i) Plot some phone posteriors. Run the code and add comments.

In [None]:
# 
id2phn={}
for k,v in PHONES.items():
  id2phn[v]=k

# 
ypred=model.predict(Xtest)
seq=ypred[0]
lab=ytest[0]
phn=[id2phn[lab[0]]]
for l in lab:
  if id2phn[l] != phn[-1]:
    phn.append(id2phn[l])

# 
plt.figure(figsize=(15,10))
plt.imshow(seq.T,origin='lower',cmap='binary',aspect='auto')
ax = plt.gca()
ax.set_yticks(range(41))
ax.set_yticklabels(labellist)
ax.tick_params(axis='y', which='both', labelleft='on', labelright='on')
plt.title(" ".join(phn))
plt.show()

---
(j) Plot the loss and accuracy curves after training the RNN. Plot the confusion matrix as a grey-scale image (see Exercise 5.3). Experiment with the network architecture and training to find the best performing network for this problem.