In [None]:
import numpy as np
from sklearn import preprocessing

In [None]:
# Necessary files
trainFile = "/home/lab/Kevin/PhonemeClassification/Features/Non-Causal Anechoic/mfcc13_train.txt"
devFile = "/home/lab/Kevin/PhonemeClassification/Features/Non-Causal Anechoic/mfcc13_dev.txt"
testFile = "/home/lab/Kevin/PhonemeClassification/Features/Non-Causal Anechoic/mfcc13_test.txt"
#trainFile = "mfcc13_train_dr1.txt"
#testFile = "mfcc13_test_dr1.txt"

In [None]:
# Read in files
def readFeatFile(filename):
    """ This function reads features and labels from a text file.
    
    Args:
        filename (str): name of text file containing features and labels
        
    Returns:
        X (np.array): matrix of features
        y (list): list of phoneme labels
    """
    
    # Read in features and labels as a string
    file_obj = open(filename,"r")
    x = file_obj.readlines()
    file_obj.close()
    
    # Initialize lists to hold features and phone labels
    X = []
    y = []
    
    # Iterate through lines and extract features and labels
    for i in range(0,len(x)):
        # Single line in text file representing a single frame
        singleFrame = x[i].split()
        
        # Phone labels expressed as strings
        y.append(singleFrame[-1])
        
        # Convert features from strings to numbers
        singleFrameFeat = []
        for j in range(0,len(singleFrame)-1):
            singleFrameFeat.append(float(singleFrame[j]))
        
        # Dynamically append converted features to growing list
        X.append(singleFrameFeat)
    
    X = np.array(X, dtype='float32')
    X = X[:,78:104] # just MFCCs and deltas of the current frame
    
    return X, y


In [None]:
# Read in features and labels for both the training and testing sets
xTrain, yTrain = readFeatFile(trainFile)
xDev, yDev = readFeatFile(devFile)
xTest, yTest = readFeatFile(testFile)

In [None]:
# Combine dev and test sets into one
xTest = np.vstack((xTest,xDev))
yTest.extend(yDev)

In [None]:
# Split the training set into training and validation
from sklearn.model_selection import train_test_split

xTrain, xValid, yTrain, yValid = train_test_split(xTrain, yTrain, test_size=0.1)

In [None]:
# Normalize features according to training data
scaler = preprocessing.StandardScaler()
scaler.fit(xTrain)
xTrain = scaler.transform(xTrain)
xValid = scaler.transform(xValid)
xTest = scaler.transform(xTest)

In [None]:
type(yTest[1])

In [None]:
# Transform phone labels to integers since this is required by PyTorch
le = preprocessing.LabelEncoder()
yTrain = (le.fit_transform(yTrain)).astype('long')
yValid = le.transform(yValid).astype('long')
yTest = le.transform(yTest).astype('long')

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

In [None]:
# Architecture
class Net(nn.Module):

    def __init__(self, num_features, num_classes):
        super(Net, self).__init__()
        
        self.fc1 = nn.Linear(num_features, 250) # Will need to change the input depending on # of features
        self.fc2 = nn.Linear(250, num_classes) # Will need to change the output depending on # of classes
        
    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = self.fc2(x)
        
        return F.log_softmax(x, dim=1)

In [None]:
# Initialize weights
def initializeWeights(m):
    """ Initialize weights from Uniform(-0.1,0.1) distribution
    as was done in Graves and Schmidhuber, 2005
    
    Args:
        m
        
    Returns:
        none
    """
    if type(nn) == 'Linear':
        torch.nn.init.uniform_(m.weight.data,a=-0.1,b=0.1)
        torch.nn.init.uniform_(m.bias.data,a=-0.1,b=0.1)

In [None]:
net = Net(np.shape(xTrain)[1],len(np.unique(yTrain)))
net.apply(initializeWeights)
net.to(device)

In [None]:
# Convert numpy arrays into tensors
xTrain = torch.from_numpy(xTrain)
xTest = torch.from_numpy(xTest)
xValid = torch.from_numpy(xValid)
yValid = torch.from_numpy(yValid)
yTrain = torch.from_numpy(yTrain)
yTest = torch.from_numpy(yTest)

In [None]:
#trainSet = torch.utils.data.DataLoader([xTest,yTest],batch_size=10,shuffle=True)

In [None]:
# Training
num_epochs = 1000
learn_rate = 1e-5

optimizer = optim.Adam(net.parameters(), lr=learn_rate)

# Put data onto device
xTrain, yTrain = xTrain.to(device), yTrain.to(device)
xValid, yValid = xValid.to(device), yValid.to(device)

# Training
for epoch in range(num_epochs):
    # Get outputs
    train_outputs = net(xTrain)
    valid_outputs = net(xValid)

    # Calculate loss for training and validation sets
    train_loss = F.cross_entropy(train_outputs, yTrain)
    valid_loss = F.cross_entropy(valid_outputs, yValid)

    # Backpropagate wrt training loss and optimize
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    if epoch % 100 == 99:
        print("Epoch: {}, Training Loss: {}, Validation Loss: {}".
                format(epoch+1, round(float(train_loss),3), round(float(valid_loss),3)))
            

In [None]:
# Testing
# Move testing data to device
xTest = xTest.to(device)

outputs = net(xTest)
yPred = torch.argmax(outputs, dim=1)

# Move outputs and predictions back to CPU
outputs = outputs.to('cpu')
yPred = yPred.to('cpu')

# Accuracy
accuracy = float(torch.sum(yTest==yPred))/float(len(yTest))
print("Accuracy: ", round(accuracy,3))