In [55]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
# from skimage import io, transform
import numpy as np
from numpy import newaxis
# import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
train_features = './RawTrainingFeatures1.csv'
import sys
np.set_printoptions(threshold=sys.maxsize)

In [56]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 64 output channels, 1x6  convolution
        # kernel
        self.local_conv = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(1,6), padding=(0, 2))
        
        # 64 input channels (check this?), feature maps from local convolution,
        # 128 output channels, 20x2 kernel (check this?)
        self.global_conv=nn.Conv2d(64,128,(88,2))
        
        #LSTM layer,48 cells each
        self.dec=nn.LSTM(128,48,2,dropout=0.25)
        
        # Size of output of LSTM, for now use # of hiden state features
        self.denseFF=nn.Linear(48,7)
        self.sm=nn.LogSoftmax()
        
    def forward(self, x):
        # Apply ReLu units to the results of convolution, local convoltion layer
        x=x.float()
        x=F.relu(self.local_conv(x))
        x = nn.ZeroPad2d((0,1,0,0))(x)
        x=nn.MaxPool2d(kernel_size=(1,4))(x)
        #Global convolution layer
        x=F.relu(self.global_conv(x))
        x = nn.ZeroPad2d((0,1,0,0))(x)
        x=nn.MaxPool2d(kernel_size=(1,2))(x)
        # remove second dimension
        # x=torch.squeeze(input=x, dim=0)
        x = x.permute(3, 0, 2, 1)
        x = torch.squeeze(x, dim=2)
        out,hidden=self.dec(x)
        # Feed output through dense dense/feedforward layer with softmax activation units to
        # classify the input onto one of the 7 emotion categories.
        out = out[-1, :, :]
        out=self.sm(self.denseFF(out))
        return out
        

In [102]:
class EmotionDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, csv_file_path, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.emotions_frame = pd.read_csv(csv_file_path,header=None)
        num_speakers = 250 ##self.emotions_frame.shape[0]
        self.transform = transform
        self.speaker_map={}#{"03":,"08":,"09":,"10":,"11":,"12":,"13":,"14":,"15":,"16":}
        features = self.emotions_frame.iloc[:, 1:-1].as_matrix()
        labels =self.emotions_frame.iloc[:,-1]
        speakers=self.emotions_frame.iloc[:,0]
        speaker_array = [""]*num_speakers
        
        j=0
        num_features = len(features)
        data_array=np.zeros((num_speakers,88,512),dtype='double')
        label2index = {
        "anger":0,
        "boredom":1,
        "disgust":2,
        "fear":3,
        "happiness":4,
        "sadness":5,
        "neutral":6
        }
        
        label_array=['']*num_speakers
        for i in range(num_speakers):
            initialID= speakers[j]
            #print(type(initialID))
            speaker=initialID[1:3]
            #print(initialID,speaker)
            speaker_array[i] = initialID
            temp_array= features[j, :]
            temp_array=np.reshape(temp_array,(88,1))
            j+=1
            # new_label = np.zeroes(7)
            idx = label2index[labels[j]]
            # new_label[idx] = 1
            label_array[i]= idx
            while j < num_features and speakers[j]==initialID:
                temp_array = np.hstack((temp_array,np.reshape(features[j, :],(88,1))))
                j+=1
            if temp_array.shape[1]<512:
                pad_length = 512-temp_array.shape[1]
                temp_array = np.pad(temp_array,((0, 0), (0, pad_length)),'constant')
            elif temp_array.shape[1]>512:
                temp_array=temp_array[:,:512]
            data_array[i]=temp_array
            
            if speaker in self.speaker_map:
                self.speaker_map[speaker]=np.append(self.speaker_map[speaker],temp_array[newaxis,::],axis=0)
            else:
                self.speaker_map[speaker]=np.empty((1,88,512))
                self.speaker_map[speaker][0,:,:]=temp_array
        self.features = data_array
        self.labels = label_array
        self.speakers = speaker_array
        self.std_map={}
        self.mean_map={}
        for ID in self.speaker_map.keys():
            std=np.std(self.speaker_map[ID],axis=(0,2))
            std=std[:,newaxis]
            std_zeros= std==0
            std[std_zeros]=1
            mean=(np.mean(test[ID],axis=(0,2)).transpose()*512).transpose()
            mean=mean[:,newaxis]
            for j in range(511):
                std=np.insert(std,1,std[:,0],axis=1)
                mean=np.insert(mean,1,mean[:,0],axis=1)
            self.std_map[ID]=std
            self.mean_map[ID]=mean
        #print(self.std_map['03'])
    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        speaker = self.speakers[idx]
        
        features = self.features[idx, :, :].astype("double")
        #print(speaker[1:3])
       # print(features)
        features=(features-self.mean_map[speaker[1:3]])/self.std_map[speaker[1:3]]
#         print(test_features)
        features = transforms.ToTensor()(features)
        
        label = self.labels[idx]
        sample = {'speaker': speaker, 'label': label,'features':features}
        if self.transform:
            sample = self.transform(sample)
        return sample
    
    

In [103]:
data=EmotionDataset(train_features)
std=data.std_map
print(std["03"].shape)

  app.launch_new_instance()


(88, 512)


In [97]:
print(std["03"])

[[1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.47127144e+01 1.47127144e+01 1.47127144e+01 1.47127144e+01
  1.4712

  1.57598674e+01 1.57598674e+01 1.57598674e+01 1.57598674e+01]]


In [104]:
## Code to train 
data=EmotionDataset(train_features)

data_loader = torch.utils.data.DataLoader(dataset=data, batch_size=10, shuffle=False)
model=Net()
loss_fn = torch.nn.NLLLoss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
running_loss = 0
for epoch in range(1):
    for sample in data_loader:
        features = sample["features"]
        label = torch.tensor(sample["label"])
        # features=torch.from_numpy(sample)
        # features=torch.unsqueeze(features,0)
        # features=torch.unsqueeze(features,0)
        y_pred = model(features)
        print(y_pred)
        loss=loss_fn(y_pred,label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
        # print statistics
        # running_loss += loss.item()
        # if i % 2000 == 1999:    # print every 2000 mini-batches
        #     print('[%d, %5d] loss: %.3f' %
        #           (epoch + 1, i + 1, running_loss / 2000))
        #     running_loss = 0.0

print('Finished Training')

  app.launch_new_instance()
  del sys.path[0]


tensor([[-2.0318, -2.1573, -1.9632, -1.8049, -1.7312, -1.9544, -2.0431],
        [-2.0367, -2.1080, -1.9095, -1.8864, -1.7916, -1.9218, -2.0006],
        [-2.0129, -2.1638, -1.9084, -1.8431, -1.7692, -1.9288, -2.0472],
        [-1.9763, -2.1300, -1.9077, -1.8709, -1.8061, -1.9258, -2.0392],
        [-1.9940, -2.1058, -1.8992, -1.8807, -1.8001, -1.9416, -2.0312],
        [-1.9704, -2.0845, -1.9476, -1.9066, -1.7994, -1.9102, -2.0281],
        [-2.0102, -2.1280, -1.9263, -1.8561, -1.7585, -1.9362, -2.0523],
        [-1.9814, -2.0914, -1.9133, -1.8941, -1.8152, -1.9123, -2.0400],
        [-2.0172, -2.1280, -1.9028, -1.8849, -1.7839, -1.9139, -2.0294],
        [-1.9819, -2.0853, -1.9145, -1.8942, -1.8215, -1.9104, -2.0382]],
       grad_fn=<LogSoftmaxBackward>)
tensor([[-1.9909, -2.0428, -2.0208, -1.8484, -1.7870, -1.9661, -1.9931],
        [-2.0265, -2.0459, -2.0302, -1.8653, -1.7474, -1.9500, -1.9931],
        [-2.0043, -2.0470, -2.0336, -1.8734, -1.7656, -1.9645, -1.9636],
        [-2.0

       grad_fn=<LogSoftmaxBackward>)
tensor([[-2.0055, -2.0405, -1.9711, -1.9293, -1.8249, -1.8709, -1.9977],
        [-2.0278, -2.0691, -1.9689, -1.8513, -1.8347, -1.8966, -1.9974],
        [-2.0262, -2.0579, -1.9991, -1.8698, -1.8125, -1.8822, -2.0000],
        [-1.9918, -2.0836, -1.9719, -1.8795, -1.8474, -1.8846, -1.9830],
        [-2.0143, -2.0820, -1.9553, -1.8807, -1.8378, -1.8857, -1.9878],
        [-1.9971, -2.0983, -1.9646, -1.8531, -1.8398, -1.9094, -1.9832],
        [-2.0178, -2.0867, -1.9490, -1.8836, -1.8204, -1.8971, -1.9911],
        [-2.0134, -2.1002, -1.9633, -1.8536, -1.8273, -1.9065, -1.9839],
        [-2.0368, -2.0990, -1.9426, -1.8585, -1.8234, -1.9061, -1.9834],
        [-2.0221, -2.0712, -1.9888, -1.8648, -1.8134, -1.8879, -2.0001]],
       grad_fn=<LogSoftmaxBackward>)
tensor([[-1.9927, -2.0869, -1.9531, -1.8796, -1.8572, -1.8800, -1.9919],
        [-1.9998, -2.1094, -1.9782, -1.8309, -1.8642, -1.8959, -1.9694],
        [-1.9852, -2.0863, -1.9777, -1.8511, -1.8

       grad_fn=<LogSoftmaxBackward>)
tensor([[-1.9373, -2.0680, -1.9990, -1.8796, -1.8954, -1.8425, -2.0198],
        [-1.9464, -2.1291, -1.9974, -1.8811, -1.8777, -1.8361, -1.9822],
        [-1.9445, -2.1147, -1.9890, -1.8336, -1.8712, -1.8630, -2.0367],
        [-1.9275, -2.1293, -2.0303, -1.8606, -1.8808, -1.8237, -2.0040],
        [-1.9159, -2.1416, -2.0220, -1.8357, -1.8697, -1.8625, -2.0100],
        [-1.9552, -2.1071, -1.9832, -1.8303, -1.8820, -1.8790, -2.0110],
        [-1.9312, -2.0767, -2.0394, -1.8887, -1.8938, -1.8167, -2.0006],
        [-1.9168, -2.1275, -2.0077, -1.8141, -1.9140, -1.8521, -2.0240],
        [-1.9432, -2.1312, -2.0040, -1.8343, -1.8597, -1.8572, -2.0271],
        [-1.9406, -2.1058, -1.9787, -1.8626, -1.8748, -1.8529, -2.0328]],
       grad_fn=<LogSoftmaxBackward>)
tensor([[-1.9387, -2.1144, -2.0657, -1.8395, -1.8440, -1.8406, -2.0185],
        [-1.9242, -2.1217, -1.9873, -1.8210, -1.8944, -1.8769, -2.0267],
        [-1.9241, -2.1397, -1.9935, -1.8100, -1.8