In [6]:
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime
import os
import csv
import argparse
import torch
import numpy as np

from models.gru import SentimentGRU_A
from models.transformer import SentimentTransformerEncoder_C

from dataset import SentimentAnalysisDataset, LABEL_MAP
from test import run_test

from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score

RESULTS_DIR = 'results'

MODELS = {
    'a': SentimentGRU_A,
    'b': None,
    'c': SentimentTransformerEncoder_C
}

data_path = 'data'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
model_a = SentimentGRU_A(100)
model_a.load_state_dict(torch.load('models/A_06_05_13_26.state'))
model_a.to(device)
model_a.eval()

SentimentGRU_A(
  (gru): GRU(100, 128, num_layers=4, batch_first=True, dropout=0.1, bidirectional=True)
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=128, out_features=128, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=128, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.1, inplace=False)
    (9): Linear(in_features=64, out_features=64, bias=True)
    (10): ReLU()
    (11): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [3]:
model_b = SentimentTransformerEncoder_C(100)
model_b.load_state_dict(torch.load('models/C_06_05_13_25.state'))
model_b.to(device)
model_b.eval()

SentimentTransformerEncoder_C(
  (tranformer): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=100, out_features=100, bias=True)
        )
        (linear1): Linear(in_features=100, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=100, bias=True)
        (norm1): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=100, out_features=100, bias=True)
        )
        (linear1): Linear(in_features=100, out_features=2048, bias=True)
        (dropou

In [4]:
train_dataset = SentimentAnalysisDataset(os.path.join(data_path, 'trainEmotions.csv'))

-> Loading word embeddings


In [5]:
test_dataset = SentimentAnalysisDataset(os.path.join(data_path, 'testEmotions.csv'))

-> Loading word embeddings


In [18]:
def model_ensemble_predict(models, dataset, device):
    y_true = []
    y_predict = []

    with torch.no_grad():
        for tokens, label in dataset:
            
            tokens = tokens.to(device).float()
            label = label.to(device)

            y_true.append(int(label.argmax()))

            if tokens.squeeze().dim() == 0 or len(tokens.squeeze()) == 0:
                # Predict neutral if no token after processing 
                # e.g., only stopwords in the original text
                y_predict.append(int(LABEL_MAP['neutral']))
                continue

            # Forward pass
            y_predict.append([model(tokens) for model in models])
            # output = model(tokens)
            # if output.dim() != 1 or output.shape[0] != 3:
            #     print(output.shape, tokens.shape, label.shape)
            # assert output.dim() == 1 and output.shape[0] == 3
            # y_predict.append(int(torch.softmax(output, dim=0).argmax()))

    return y_true, y_predict
    # return accuracy_score(y_true, y_predict), precision_score(y_true, y_predict), recall_score(y_true, y_predict)

In [39]:
y_true, y_predict = model_ensemble_predict([model_a, model_b], test_dataset, device)

### Approach A

Average the prediction with equal weight to each model

In [42]:
y = []
for outputs in y_predict:
    try:
        _y = torch.softmax(sum(outputs) / 2, dim=0)
    except:
        _y = torch.Tensor([0,1,0])
    y.append(int(_y.argmax(dim=0)))

print('Accuracy:', accuracy_score(y_true, y))

Accuracy: 0.5189166838949762


### Approach B

In [51]:
y = []
for outputs in y_predict:
    if isinstance(outputs, int):
        outputs = [torch.Tensor([0,1,0]), torch.Tensor([0,1,0])]
    
    _y = [torch.softmax(x, dim=0) for x in outputs]
    confidence = [max(x) for x in _y]
    most_confident = confidence.index(max(confidence))

    _y = _y[most_confident]
    y.append(int(_y.argmax(dim=0)))

print('Accuracy:', accuracy_score(y_true, y))

Accuracy: 0.5375232582179037
