In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [4]:
!pip install transformers



In [5]:
from transformers import BertTokenizer

# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

Loading BERT tokenizer...


In [6]:
import pickle

train_df = pickle.load(open('/content/gdrive/My Drive/Multi_Model_Emotion_Detection/MELD/csv/train.pkl', 'rb'))
val_df = pickle.load(open('/content/gdrive/My Drive/Multi_Model_Emotion_Detection/MELD/csv/dev.pkl', 'rb'))
test_df = pickle.load(open('/content/gdrive/My Drive/Multi_Model_Emotion_Detection/MELD/csv/test.pkl', 'rb'))

In [106]:
from transformers import BertForSequenceClassification, AdamW, BertConfig

# Load BertForSequenceClassification, the pretrained BERT model with a single 
# linear classification layer on top. 
model = BertForSequenceClassification.from_pretrained(
    "/content/gdrive/My Drive/Multi_Model_Emotion_Detection/BERT Models/DailyDialog+MELD+IEMOCAP/4/epoch_3", # Use the 12-layer BERT model, with an uncased vocab.
    num_labels = 5, # The number of output labels--2 for binary classification.
                    # You can increase this for multi-class tasks.   
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = True, # Whether the model returns all hidden-states.
)

# Tell pytorch to run this model on the GPU.
model.cuda()

print('Model Ready')

Model Ready


In [107]:
def get_embeddings(utterance):
  input_ids = tokenizer.encode(utterance, add_special_tokens=True)
  input_ids = torch.LongTensor(input_ids).unsqueeze(0).to(device)
  _, hidden_states = model(input_ids)
  embedding = hidden_states[-1][0][0]
  return embedding.cpu().detach().numpy()

In [108]:
train_df['Text Embeddings'] = train_df['Utterance'].apply(get_embeddings)
test_df['Text Embeddings'] = test_df['Utterance'].apply(get_embeddings)
val_df['Text Embeddings'] = val_df['Utterance'].apply(get_embeddings)

In [109]:
EMOTION_IDS = {
    0: 'anger/disgust',
    1: 'fear/surprise',
    2: 'happiness',
    3: 'neutral',
    4: 'sadness'
}

In [110]:
import pandas as pd

In [111]:
def get_id(self, dataset):
		return "%s-%s-%s-%s" % (EMOTION_IDS[self.Emotion_Label], dataset, self.Dialogue_ID, self.Utterance_ID)

In [112]:
train_df['ID'] = train_df.apply(lambda row: get_id(row, 'train'), axis=1)
test_df['ID'] = test_df.apply(lambda row: get_id(row, 'test'), axis=1)
val_df['ID'] = val_df.apply(lambda row: get_id(row, 'dev'), axis=1)

In [113]:
dataset_df = pd.concat([train_df, test_df, val_df])

In [114]:
pickle.dump(dataset_df[['ID', 'Text Embeddings']], open('/content/gdrive/My Drive/Multi_Model_Emotion_Detection/Embeddings/MELD Embeddings/DailyDialog+MELD+IEMOCAP/4/text_embeddings.pkl', 'wb'))