 # Emotion Recognition with a CNN

## Load Data

Pick two different emotion classes for your model to predict. Load/filter your
dataset to include only the related class data. Create another dataset and change only one of the
classes this time.

In [1]:
import pandas as pd

In [2]:
test_text = pd.read_csv('test_text.txt', header=None, names=['text'], sep='\r\n', engine='python')
test_labels = pd.read_csv('test_labels.txt', header=None, names=['label'], sep='\r\n', engine='python')
test_data = pd.concat([test_text, test_labels], axis=1)

print("Preview of testing data: ")
test_data[:5]

Preview of testing data: 


Unnamed: 0,text,label
0,#Deppression is real. Partners w/ #depressed p...,3
1,@user Interesting choice of words... Are you c...,0
2,My visit to hospital for care triggered #traum...,3
3,@user Welcome to #MPSVT! We are delighted to h...,1
4,What makes you feel #joyful?,1


In [3]:
train_text = pd.read_csv('train_text.txt', header=None, names=['text'], sep='\r\n', engine='python')
train_labels = pd.read_csv('train_labels.txt', header=None, names=['label'], sep='\r\n', engine='python')
train_data = pd.concat([train_text, train_labels], axis=1)

print("Preview of training data: ")
train_data[:5]

Preview of training data: 


Unnamed: 0,text,label
0,“Worry is a down payment on a problem you may ...,2
1,My roommate: it's okay that we can't spell bec...,0
2,No but that's so cute. Atsu was probably shy a...,1
3,Rooneys fucking untouchable isn't he? Been fuc...,0
4,it's pretty depressing when u hit pan on ur fa...,3


In [4]:
val_text = pd.read_csv('val_text.txt', header=None, names=['text'], sep='\r\n', engine='python')
val_labels = pd.read_csv('val_labels.txt', header=None, names=['label'], sep='\r\n', engine='python')
val_data = pd.concat([val_text, val_labels],  axis=1)

print("Preview of validation data: ")
val_data[:5]

Preview of validation data: 


Unnamed: 0,text,label
0,"@user @user Oh, hidden revenge and anger...I r...",0
1,if not then #teamchristine bc all tana has don...,0
2,Hey @user #Fields in #skibbereen give your onl...,0
3,Why have #Emmerdale had to rob #robron of havi...,0
4,@user I would like to hear a podcast of you go...,0


In [5]:
with open(f'mapping.txt') as f:
    mapping = f.read().replace('\t', ' ').split('\n')
mapping

['0 anger', '1 joy', '2 optimism', '3 sadness']

Classes we choose to explore:

First two classes: optimism, sadness (2, 3)


Second two classes: anger, sadness (0, 3)

In [7]:
# Filter the data
def filter_data_by_classes(dataset : pd.DataFrame, two_classes=[2,3]):
    filtered_data = dataset[dataset['label'].isin(two_classes)]
    return filtered_data

train_23 = filter_data_by_classes(train_data)
test_23 = filter_data_by_classes(test_data)
val_23 = filter_data_by_classes(val_data)

train_03 = filter_data_by_classes(train_data, [0, 3])
test_03 = filter_data_by_classes(test_data, [0, 3])
val_03 = filter_data_by_classes(val_data, [0, 3])

In [8]:
print("Preview of training data filtered: ")
train_23[:3], train_03[:3]

Preview of training data filtered: 


(                                                text  label
 0  “Worry is a down payment on a problem you may ...      2
 4  it's pretty depressing when u hit pan on ur fa...      3
 6  Making that yearly transition from excited and...      3,
                                                 text  label
 1  My roommate: it's okay that we can't spell bec...      0
 3  Rooneys fucking untouchable isn't he? Been fuc...      0
 4  it's pretty depressing when u hit pan on ur fa...      3)

## Data Preprocessing

1. Special Characters Cleaning
2. Character Casing
3. Stop Word Removal

In [9]:
import nltk

In [10]:
input = "It's a text to test pre-processing functions. "

### Special Characters Cleaning

In [11]:
# import library: Regular Expression
import re

"""
Clean the data by removing special characters (punctuation)
"""
def sp_chara_cleaning(text):
    clean_text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
    return clean_text

In [12]:
# Special characters like ' . - are removed.
input = sp_chara_cleaning(input)
input

'It s a text to test pre processing functions  '

### Character Casing

In [13]:
"""
Lowercase all words.
"""
def character_casing(text):
    lower_text = text.lower()
    return lower_text

In [14]:
# All cases become lowercases.
input = character_casing(input)
input

'it s a text to test pre processing functions  '

### Stop Word Removal

In [15]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [16]:
from nltk.corpus import stopwords

"""
Here we remove words that in English stop words list.
"""
def stop_word_removal(text):
    words = text.split()
    stop_words = stopwords.words("english")
    clean_words = [w for w in words if w not in stop_words]
    clean_text = " ".join(clean_words)
    return clean_text



In [17]:
# Stop words like 'it', 's', 'a', 'this' are removed.
input = stop_word_removal(input)
print(input)

text test pre processing functions


### Apply Preprocessing Functions

In [18]:
data_list = [train_23, test_23, val_23, train_03, test_03, val_03]
funcs = [sp_chara_cleaning, character_casing, stop_word_removal]
preview = train_23["text"][:1].copy()
for func in funcs:
    print("Function in process: ", func.__name__)
    for data in data_list:
        data["text"] = data["text"].apply(func, )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["text"] = data["text"].apply(func, )


Function in process:  sp_chara_cleaning
Function in process:  character_casing
Function in process:  stop_word_removal


In [19]:
print("Sample before pre-processing: \n", preview[0])
print("Sample after pre-processing: \n", train_23["text"][0])

Sample before pre-processing: 
 “Worry is a down payment on a problem you may never have'.  Joyce Meyer.  #motivation #leadership #worry
Sample after pre-processing: 
 worry payment problem may never joyce meyer motivation leadership worry


### Convert Labels to Classes Space

Since the cnn only classify two classes in this task, we need to map the labels to classes space [0, 1].

Given that
0	anger
1	joy
2	optimism
3	sadness

In this code, we choose [2, 3] --> [0, 1], and [0, 3] --> [0, 1].

In [20]:
for data in data_list:
  data["label"] = data["label"].map({0:0, 1:1, 2:0, 3:1})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["label"] = data["label"].map({0:0, 1:1, 2:0, 3:1})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["label"] = data["label"].map({0:0, 1:1, 2:0, 3:1})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["label"] = data["label"].map({0:0, 1:1, 2:0, 3:1})
A value is trying to be set on a copy

In [21]:
train_23[:5]

Unnamed: 0,text,label
0,worry payment problem may never joyce meyer mo...,0
4,pretty depressing u hit pan ur favourite highl...,1
6,making yearly transition excited hopeful colle...,1
11,newyork several baloch amp indian activists ho...,1
17,saved ordering risk life panic stayed calm res...,0


### Create Dataset for Training

#### Tokenize Sentences

In [22]:
%pip install transformers



In [23]:
from transformers import AutoTokenizer, AutoConfig

# Load bert-base-uncased, a pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
config = AutoConfig.from_pretrained("bert-base-uncased")


In [24]:
# get the embedding dimension
embedding_dim = config.hidden_size
embedding_dim

768

In [25]:
# get the vocabulary size
vocab_size = tokenizer.vocab_size
vocab_size

30522

In [26]:
input_ids_list = []
# data_list = [train_23, test_23, val_23, train_03, test_03, val_03]
for data in data_list:
    input_ids_list.append(tokenizer(list(data["text"]), padding=True, truncation=True, return_tensors="pt"))

In [27]:
import torch
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
# device = torch.device("mps")
device

'cuda:0'

In [28]:

from torch.utils.data import TensorDataset, DataLoader

def createDataLoader(X, y, batch_size=64):
  torch.manual_seed(1)
  data_set = TensorDataset(X.to(device), y.to(device))
  data_loader = DataLoader(data_set, batch_size=batch_size)
  return data_loader


In [29]:
# create data loaders with batch size = 64
data_loaders = []
for i, input_ids in enumerate(input_ids_list):
  data_loaders.append(createDataLoader(input_ids.input_ids, torch.tensor(list(data_list[i]["label"]))))

In [30]:
# create data loaders with batch size = 128
data_loaders_128 = []
for i, input_ids in enumerate(input_ids_list):
  data_loaders_128.append(createDataLoader(input_ids.input_ids, torch.tensor(list(data_list[i]["label"])), batch_size = 128))

## Training

### Model

In [31]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import time
import logging

In [32]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class EmotionCNN(nn.Module):
    def __init__(self,
                 vocab_size = vocab_size,
                 embedding_dim = embedding_dim,
                 num_filters = [50, 100, 150],
                 filter_sizes = [3, 4, 5],
                 num_classes = 2,
                 dropout = 0.2,
                 stride = 1,
                 pool_func = 'max'):
        super(EmotionCNN, self).__init__()

        assert len(num_filters) == len(filter_sizes)

        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim, max_norm=5.0)

        # Convolutional layers
        self.convs = nn.ModuleList([
            nn.Conv1d(
                in_channels=embedding_dim,
                out_channels=num_filters[i],
                kernel_size=filter_sizes[i],
                stride=stride,
                ) for i in range(len(filter_sizes))
        ])

        # Fully connected network
        self.fc = nn.Linear(sum(num_filters), num_classes)

        # Dropout
        self.dropout = nn.Dropout(dropout)

        self.pool_func = pool_func

    def forward(self, input):
        # input: [batch_size, seq_length]

        # Calculate embeddings
        # embedded: [batch_size, seq_length, embedding_dim]
        embedded = self.embedding(input)

        # Switch the last two dimensions to match input for convs
        # permuted: [batch_size, embedding_dim, seq_length]
        permuted = embedded.permute(0, 2, 1)

        # Apply convolution and ReLU activation
        # conved: [batch_size, ...]
        conved = [F.relu(conv(permuted)) for conv in self.convs]

        # Max pooling or average pooling over the time dimension
        # pooled: [batch_size, num_filters]
        if self.pool_func == 'max':
            pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        else:
            pooled = [F.avg_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]

        # Concatenate pooled features
        # cat: [batch_size, sum(num_filters)]
        cat = torch.cat(pooled, dim=1)

        # Apply dropout
        # dropped: [batch_size, sum(num_filters)]
        dropped = self.dropout(cat)

        # Apply all full connected layers
        # output: [batch_size, num_classes]
        output = self.fc(dropped)


        # for data in [input, embedded, permuted, cat, dropped, output]:
        #     print(data.shape)

        return output


### GridSearchCV

In [33]:
%pip install skorch



In [51]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from skorch.callbacks import EarlyStopping
from sklearn.metrics import make_scorer, f1_score, accuracy_score

from skorch import NeuralNetClassifier


EmoCNN = NeuralNetClassifier(
    module=EmotionCNN(
            vocab_size=vocab_size,
            embedding_dim=embedding_dim),
    callbacks = [EarlyStopping()],
    max_epochs=200,
    criterion=nn.CrossEntropyLoss(),
    verbose = 0,
    device='cuda',
)


# Here are the parameters we want to find best.
# These include the requirements in Exercise 1 document.
param_grid = {  # optimizer
                'optimizer': [torch.optim.Adam,
                              torch.optim.RMSprop],

                # learning rate
                'optimizer__lr': [0.01, 0.001],

                # layer sizes
                'module__dropout': [0.5, 0.7],

                # number of filters
                'module__num_filters': [[50, 100, 150], [150, 150, 150],
                 [200, 200, 200]],

                # stride
                'module__stride': [1, 2],

                # kernel size
                'module__filter_sizes': [[3, 4, 5], [2, 3, 4]],

                # pooling
                'module__pool_func': ['avg', 'max'],

                # # early stopping
                # 'callbacks__EarlyStopping__patience': [5, 10],

    }

scoring = {
  'F1 Score': make_scorer(f1_score),
  'Accuracy': make_scorer(accuracy_score)
}


In [52]:
gs_CNN = GridSearchCV(EmoCNN, param_grid=param_grid, cv=3, scoring=scoring,
                      refit=False)

In [53]:
X_gs, y_gs = input_ids_list[0].input_ids[:600], torch.tensor(list(data_list[0]["label"])[:600])
gs_CNN.fit(X_gs, y_gs)

In [65]:
# Choose columns to show in table
cols = []

cols.extend(['param_{}'.format(k) for k in param_grid.keys()])
cols.extend(['rank_test_Accuracy', 'mean_test_Accuracy', 'rank_test_F1 Score', 'mean_test_F1 Score',])

In [66]:
CNN_df = pd.DataFrame.from_dict(gs_CNN.cv_results_)
CNN_df.sort_values(by=["rank_test_Accuracy"]).iloc[:5][cols]

Unnamed: 0,param_optimizer,param_optimizer__lr,param_module__dropout,param_module__num_filters,param_module__stride,param_module__filter_sizes,param_module__pool_func,rank_test_Accuracy,mean_test_Accuracy,rank_test_F1 Score,mean_test_F1 Score
80,<class 'torch.optim.adam.Adam'>,0.01,0.5,"[200, 200, 200]",1,"[2, 3, 4]",avg,1,0.796667,20,0.866847
179,<class 'torch.optim.rmsprop.RMSprop'>,0.001,0.7,"[200, 200, 200]",1,"[2, 3, 4]",avg,2,0.793333,1,0.873205
65,<class 'torch.optim.adam.Adam'>,0.001,0.5,"[150, 150, 150]",1,"[2, 3, 4]",avg,3,0.791667,2,0.872327
0,<class 'torch.optim.adam.Adam'>,0.01,0.5,"[50, 100, 150]",1,"[3, 4, 5]",avg,4,0.79,48,0.861566
33,<class 'torch.optim.adam.Adam'>,0.001,0.5,"[200, 200, 200]",1,"[3, 4, 5]",avg,4,0.79,3,0.87194


In [61]:
CNN_df.sort_values(by=["rank_test_F1 Score"]).iloc[:5][cols]

Unnamed: 0,param_optimizer,param_optimizer__lr,param_module__dropout,param_module__num_filters,param_module__stride,param_module__filter_sizes,param_module__pool_func,rank_test_Accuracy,mean_test_Accuracy,rank_test_F1 Score,mean_test_F1 Score
179,<class 'torch.optim.rmsprop.RMSprop'>,0.001,0.7,"[200, 200, 200]",1,"[2, 3, 4]",avg,2,0.793333,1,0.873205
65,<class 'torch.optim.adam.Adam'>,0.001,0.5,"[150, 150, 150]",1,"[2, 3, 4]",avg,3,0.791667,2,0.872327
33,<class 'torch.optim.adam.Adam'>,0.001,0.5,"[200, 200, 200]",1,"[3, 4, 5]",avg,4,0.79,3,0.87194
129,<class 'torch.optim.adam.Adam'>,0.001,0.7,"[200, 200, 200]",1,"[3, 4, 5]",avg,4,0.79,4,0.871746
19,<class 'torch.optim.rmsprop.RMSprop'>,0.001,0.5,"[150, 150, 150]",1,"[3, 4, 5]",avg,4,0.79,5,0.871415


In [62]:
# show best params
CNN_df.sort_values(by=["rank_test_F1 Score"]).iloc[0]['params']

{'module__dropout': 0.7,
 'module__filter_sizes': [2, 3, 4],
 'module__num_filters': [200, 200, 200],
 'module__pool_func': 'avg',
 'module__stride': 1,
 'optimizer': torch.optim.rmsprop.RMSprop,
 'optimizer__lr': 0.001}

### Train with First Dataset

Classes here are: optimism, sadness

In [71]:
import torch.optim as optim

# Instantiate CNN model
model = EmotionCNN(
            dropout=0.7,
            filter_sizes = [2, 3, 4],
            num_filters = [200, 200, 200],
            pool_func = 'avg',
            stride = 1
            )

# Send model to `device` (GPU/CPU)
model.to(device)

# Instantiate Adadelta optimizer
optimizer = optim.RMSprop(model.parameters(), lr=0.001)

# Specify loss function
criterion = nn.CrossEntropyLoss()
logging.basicConfig(filename='EmotionCNN.log', level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

In [72]:

# Start training loop
print("Start training...\n")
print("-"*60, "\n")

# specify max epochs and early stopping patience
epochs = 200
lowest_loss = float('inf')
max_patience = 5

# data order: train_23, test_23, val_23, train_03, test_03, val_0

for epoch in range(epochs):
  total_loss = 0
  start_time = time.time()
  correct = 0
  total = 0
  # Put the model into the training mode
  model.train()
  for batch_num, (b_input_ids, b_labels) in enumerate(data_loaders[0]):

    # Torch accumulates gradients. Before passing in a
    # new instance, zero out the gradients from the old instance
    model.zero_grad()

    # Perform a forward pass. This will return logits.
    logits = model(b_input_ids)

    # Compute loss and accumulate the loss values
    loss = criterion(logits, b_labels)

    total_loss += loss.item()

    # Perform a backward pass to calculate gradients
    loss.backward()

    # Update parameters
    optimizer.step()

    _, predicted = logits.max(dim=1)
    total += len(b_labels)
    correct += predicted.eq(b_labels).sum().item()

    # Calculate the average loss over the entire training data
  avg_train_loss = total_loss / len(data_loaders[0])
  train_accuracy = correct / total


  # Validation
  model.eval()
  val_loss = 0.0
  correct = 0
  total = 0

  with torch.no_grad():
      for batch_num, (b_input_ids, b_labels) in enumerate(data_loaders[2]):

          val_logits = model(b_input_ids)

          # Compute the validation loss
          val_loss += criterion(val_logits, b_labels).item()

          # Calculate validation accuracy
          _, predicted = val_logits.max(dim=1)
          total += len(b_labels)
          correct += predicted.eq(b_labels).sum().item()

  avg_val_loss = val_loss / len(data_loaders[2])
  val_accuracy = correct / total

  info = f"Epoch: {epoch + 1} / {epochs} Time: {time.time() - start_time:.2f}s \
\nTrain Loss: {avg_train_loss:.4f} Train Acc: {train_accuracy:.4f} \
\nVal Loss: {avg_val_loss:.4f} Val Acc: {val_accuracy:.4f}\n"
  print(info)
  print("-"*60, "\n")
  logging.info(info)

  if val_loss < lowest_loss:
      lowest_loss = val_loss
      patience = 0  # Reset patience counter
  else:
      patience += 1  # Increment patience counter

  if patience >= max_patience:
      print(f'Early stopping after {epoch} epochs.')
      break

torch.save(model.state_dict(), f'EmotionCNN_{epochs}.pth')


Start training...

------------------------------------------------------------ 

Epoch: 1 / 200 Time: 0.45s 
Train Loss: 0.5706 Train Acc: 0.7372 
Val Loss: 0.4784 Val Acc: 0.7778

------------------------------------------------------------ 

Epoch: 2 / 200 Time: 0.40s 
Train Loss: 0.2555 Train Acc: 0.8947 
Val Loss: 0.5071 Val Acc: 0.7949

------------------------------------------------------------ 

Epoch: 3 / 200 Time: 0.39s 
Train Loss: 0.0844 Train Acc: 0.9748 
Val Loss: 0.6068 Val Acc: 0.7607

------------------------------------------------------------ 

Epoch: 4 / 200 Time: 0.36s 
Train Loss: 0.0330 Train Acc: 0.9948 
Val Loss: 0.6758 Val Acc: 0.7778

------------------------------------------------------------ 

Epoch: 5 / 200 Time: 0.36s 
Train Loss: 0.0184 Train Acc: 0.9965 
Val Loss: 0.7265 Val Acc: 0.7778

------------------------------------------------------------ 

Epoch: 6 / 200 Time: 0.34s 
Train Loss: 0.0134 Train Acc: 0.9991 
Val Loss: 0.7771 Val Acc: 0.7692

---

In [73]:
model.eval()
test_loss = 0.0
correct = 0
total = 0
predictions = []
with torch.no_grad():
    for batch_num, (b_input_ids, b_labels) in enumerate(data_loaders[1]):

        test_logits = model(b_input_ids)

        # Compute the validation loss
        test_loss += criterion(test_logits, b_labels).item()

        # Calculate validation accuracy
        _, predicted = test_logits.max(dim=1)
        predictions.extend(predicted.tolist())
        total += len(b_labels)
        correct += predicted.eq(b_labels).sum().item()

avg_test_loss = test_loss / len(data_loaders[1])
test_accuracy = correct / total
info = f"Test Loss: {avg_test_loss:.4f} Test Acc: {test_accuracy:.4f}"
print(info)
logging.info(info)


Test Loss: 0.5445 Test Acc: 0.8178


In [74]:
from sklearn.metrics import f1_score
# calculate f1 score for test set
round(f1_score(y_true = data_list[1]['label'], y_pred = predictions), 4)

0.8844

Test if batch size will influence a lot

In [75]:
# Start training loop
print("Start training...\n")
print("-"*60, "\n")

# specify max epochs and early stopping patience
epochs = 200
lowest_loss = float('inf')
max_patience = 5

# data order: train_23, test_23, val_23, train_03, test_03, val_0

for epoch in range(epochs):
  total_loss = 0
  start_time = time.time()
  correct = 0
  total = 0
  # Put the model into the training mode
  model.train()
  for batch_num, (b_input_ids, b_labels) in enumerate(data_loaders_128[0]):

    # Torch accumulates gradients. Before passing in a
    # new instance, zero out the gradients from the old instance
    model.zero_grad()

    # Perform a forward pass. This will return logits.
    logits = model(b_input_ids)

    # Compute loss and accumulate the loss values
    loss = criterion(logits, b_labels)

    total_loss += loss.item()

    # Perform a backward pass to calculate gradients
    loss.backward()

    # Update parameters
    optimizer.step()

    _, predicted = logits.max(dim=1)
    total += len(b_labels)
    correct += predicted.eq(b_labels).sum().item()

    # Calculate the average loss over the entire training data
  avg_train_loss = total_loss / len(data_loaders_128[0])
  train_accuracy = correct / total


  # Validation
  model.eval()
  val_loss = 0.0
  correct = 0
  total = 0

  with torch.no_grad():
      for batch_num, (b_input_ids, b_labels) in enumerate(data_loaders_128[2]):

          val_logits = model(b_input_ids)

          # Compute the validation loss
          val_loss += criterion(val_logits, b_labels).item()

          # Calculate validation accuracy
          _, predicted = val_logits.max(dim=1)
          total += len(b_labels)
          correct += predicted.eq(b_labels).sum().item()

  avg_val_loss = val_loss / len(data_loaders_128[2])
  val_accuracy = correct / total

  info = f"Epoch: {epoch + 1} / {epochs} Time: {time.time() - start_time:.2f}s \
\nTrain Loss: {avg_train_loss:.4f} Train Acc: {train_accuracy:.4f} \
\nVal Loss: {avg_val_loss:.4f} Val Acc: {val_accuracy:.4f}\n"
  print(info)
  print("-"*60, "\n")
  logging.info(info)

  if val_loss < lowest_loss:
      lowest_loss = val_loss
      patience = 0  # Reset patience counter
  else:
      patience += 1  # Increment patience counter

  if patience >= max_patience:
      print(f'Early stopping after {epoch} epochs.')
      break

torch.save(model.state_dict(), f'EmotionCNN_{epochs}.pth')


Start training...

------------------------------------------------------------ 

Epoch: 1 / 200 Time: 0.44s 
Train Loss: 0.0082 Train Acc: 0.9983 
Val Loss: 0.8067 Val Acc: 0.7607

------------------------------------------------------------ 

Epoch: 2 / 200 Time: 0.34s 
Train Loss: 0.0084 Train Acc: 0.9983 
Val Loss: 0.8310 Val Acc: 0.7607

------------------------------------------------------------ 

Epoch: 3 / 200 Time: 0.34s 
Train Loss: 0.0063 Train Acc: 0.9991 
Val Loss: 0.8594 Val Acc: 0.7521

------------------------------------------------------------ 

Epoch: 4 / 200 Time: 0.34s 
Train Loss: 0.0048 Train Acc: 1.0000 
Val Loss: 0.8872 Val Acc: 0.7521

------------------------------------------------------------ 

Epoch: 5 / 200 Time: 0.34s 
Train Loss: 0.0051 Train Acc: 1.0000 
Val Loss: 0.9105 Val Acc: 0.7521

------------------------------------------------------------ 

Epoch: 6 / 200 Time: 0.35s 
Train Loss: 0.0049 Train Acc: 0.9991 
Val Loss: 0.9184 Val Acc: 0.7692

---

In [76]:
model.eval()
test_loss = 0.0
correct = 0
total = 0
predictions = []
with torch.no_grad():
    for batch_num, (b_input_ids, b_labels) in enumerate(data_loaders_128[1]):

        test_logits = model(b_input_ids)

        # Compute the validation loss
        test_loss += criterion(test_logits, b_labels).item()

        # Calculate validation accuracy
        _, predicted = test_logits.max(dim=1)
        predictions.extend(predicted.tolist())
        total += len(b_labels)
        correct += predicted.eq(b_labels).sum().item()

avg_test_loss = test_loss / len(data_loaders_128[1])
test_accuracy = correct / total
info = f"Test Loss: {avg_test_loss:.4f} Test Acc: {test_accuracy:.4f}"
print(info)
logging.info(info)


Test Loss: 0.6405 Test Acc: 0.8119


In [77]:
# calculate f1 score for test set
round(f1_score(y_true = data_list[1]['label'], y_pred = predictions), 4)

0.8823

Compared to the results from batch size 64, results from batch size 128 have few differences and are a litte low on both metrics. So we keep using the batch size 64 in second dataset.

### Train with Second Dataset

Classes here are: anger and sadness

In [89]:
import torch.optim as optim

# Instantiate CNN model
model = EmotionCNN(
            dropout=0.7,
            filter_sizes = [2, 3, 4],
            num_filters = [200, 200, 200],
            pool_func = 'avg',
            stride = 1
            )

# Send model to `device` (GPU/CPU)
model.to(device)

# Instantiate Adadelta optimizer
optimizer = optim.RMSprop(model.parameters(), lr=0.001)

# Specify loss function
criterion = nn.CrossEntropyLoss()
logging.basicConfig(filename='EmotionCNN.log', level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

In [90]:

# Start training loop
print("Start training...\n")
print("-"*60, "\n")

# specify max epochs and early stopping patience
epochs = 200
lowest_loss = float('inf')
max_patience = 5

# data order: train_23, test_23, val_23, train_03, test_03, val_0

for epoch in range(epochs):
  total_loss = 0
  start_time = time.time()
  correct = 0
  total = 0
  # Put the model into the training mode
  model.train()
  for batch_num, (b_input_ids, b_labels) in enumerate(data_loaders[3]):

    # Torch accumulates gradients. Before passing in a
    # new instance, zero out the gradients from the old instance
    model.zero_grad()

    # Perform a forward pass. This will return logits.
    logits = model(b_input_ids)

    # Compute loss and accumulate the loss values
    loss = criterion(logits, b_labels)

    total_loss += loss.item()

    # Perform a backward pass to calculate gradients
    loss.backward()

    # Update parameters
    optimizer.step()

    _, predicted = logits.max(dim=1)
    total += len(b_labels)
    correct += predicted.eq(b_labels).sum().item()

    # Calculate the average loss over the entire training data
  avg_train_loss = total_loss / len(data_loaders[3])
  train_accuracy = correct / total


  # Validation
  model.eval()
  val_loss = 0.0
  correct = 0
  total = 0

  with torch.no_grad():
      for batch_num, (b_input_ids, b_labels) in enumerate(data_loaders[5]):

          val_logits = model(b_input_ids)

          # Compute the validation loss
          val_loss += criterion(val_logits, b_labels).item()

          # Calculate validation accuracy
          _, predicted = val_logits.max(dim=1)
          total += len(b_labels)
          correct += predicted.eq(b_labels).sum().item()

  avg_val_loss = val_loss / len(data_loaders[5])
  val_accuracy = correct / total

  info = f"Epoch: {epoch + 1} / {epochs} Time: {time.time() - start_time:.2f}s \
\nTrain Loss: {avg_train_loss:.4f} Train Acc: {train_accuracy:.4f} \
\nVal Loss: {avg_val_loss:.4f} Val Acc: {val_accuracy:.4f}\n"
  print(info)
  print("-"*60, "\n")
  logging.info(info)

  if val_loss < lowest_loss:
      lowest_loss = val_loss
      patience = 0  # Reset patience counter
  else:
      patience += 1  # Increment patience counter

  if patience >= max_patience:
      print(f'Early stopping after {epoch} epochs.')
      break

torch.save(model.state_dict(), f'EmotionCNN_{epochs}.pth')


Start training...

------------------------------------------------------------ 

Epoch: 1 / 200 Time: 0.64s 
Train Loss: 0.5379 Train Acc: 0.7348 
Val Loss: 0.4135 Val Acc: 0.8072

------------------------------------------------------------ 

Epoch: 2 / 200 Time: 0.53s 
Train Loss: 0.1783 Train Acc: 0.9273 
Val Loss: 0.4299 Val Acc: 0.8072

------------------------------------------------------------ 

Epoch: 3 / 200 Time: 0.53s 
Train Loss: 0.0773 Train Acc: 0.9752 
Val Loss: 0.4769 Val Acc: 0.8313

------------------------------------------------------------ 

Epoch: 4 / 200 Time: 0.53s 
Train Loss: 0.0505 Train Acc: 0.9809 
Val Loss: 0.5252 Val Acc: 0.8233

------------------------------------------------------------ 

Epoch: 5 / 200 Time: 0.53s 
Train Loss: 0.0374 Train Acc: 0.9845 
Val Loss: 0.5602 Val Acc: 0.8112

------------------------------------------------------------ 

Epoch: 6 / 200 Time: 0.53s 
Train Loss: 0.0254 Train Acc: 0.9902 
Val Loss: 0.5992 Val Acc: 0.8153

---

In [91]:
model.eval()
test_loss = 0.0
correct = 0
total = 0
predictions = []
with torch.no_grad():
    for batch_num, (b_input_ids, b_labels) in enumerate(data_loaders[4]):

        test_logits = model(b_input_ids)

        # Compute the validation loss
        test_loss += criterion(test_logits, b_labels).item()

        # Calculate validation accuracy
        _, predicted = test_logits.max(dim=1)
        predictions.extend(predicted.tolist())
        total += len(b_labels)
        correct += predicted.eq(b_labels).sum().item()

avg_test_loss = test_loss / len(data_loaders[4])
test_accuracy = correct / total
info = f"Test Loss: {avg_test_loss:.4f} Test Acc: {test_accuracy:.4f}"
print(info)
logging.info(info)


Test Loss: 0.5883 Test Acc: 0.8160


In [92]:
# calculate f1 score for test set
round(f1_score(y_true = data_list[4]['label'], y_pred = predictions), 4)

0.7672