In [4]:
import pandas as pd
import os
import torch

In [24]:
config = {
    # data 
    # "data_dir":os.path.join('//mnt/d/Programs/NLP/utils/CASA-Dialogue-Act-Classifier-main', 'data'),#os.getcwd(), 'data'),
    "data_dir":os.path.join(os.getcwd(),'data'),
    "dataset":"switchboard",
    #"text_field":"clean_text",
    #"label_field":"act_label_1",
    "text_field":"Text",
    "label_field":"DamslActTag",

    "max_len":256,
    "batch_size":16, #change it to 64
    "num_workers":48,
    
    # model
    "model_name":"roberta-base", #roberta-base
    "hidden_size":768,
    "num_classes":43, # there are 43 classes in switchboard corpus
    
    # training
    # "save_dir":os.path.join('//mnt/d/Programs/NLP/utils/CASA-Dialogue-Act-Classifier-main', 'output'),
    "save_dir":os.path.join(os.getcwd(), 'output'),
    "project":"dialogue-act-classification",
    "run_name":"context-aware-attention-dac",
    "lr":1e-5,
    "monitor":"val_accuracy",
    "min_delta":0.001,
    "filepath":"./checkpoints/{epoch}-{val_accuracy:4f}",
    "precision":32,
    "average":"micro",
    "epochs":100,
    "device":torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    "restart":False,
    "restart_checkpoint":"./checkpoints/epoch=10-val_accuracy=0.720291.ckpt" 
}

In [14]:
from torch.utils.data import Dataset, DataLoader
import torch

act_label_names = {
    'name':[
        'Statement-non-opinion',
        'Acknowledge (Backchannel)',
        'Statement-opinion',
        'Agree/Accept',
        'Abandoned or Turn-Exit',
        'Appreciation',
        'Yes-No-Question',
        'Non-verbal',
        'Yes answers',
        'Conventional-closing',
        'Uninterpretable',
        'Wh-Question',
        'No answers',
        'Response Acknowledgement',
        'Hedge',
        'Declarative Yes-No-Question',
        'Other',
        'Backchannel in question form',
        'Quotation',
        'Summarize/reformulate',
        'Affirmative non-yes answers',
        'Action-directive',
        'Collaborative Completion',
        'Repeat-phrase',
        'Open-Question',
        'Rhetorical-Questions',
        'Hold before answer/agreement',
        'Reject',
        'Negative non-no answers',
        'Signal-non-understanding',
        'Other answers',
        'Conventional-opening',
        'Or-Clause',
        'Dispreferred answers',
        '3rd-party-talk',
        'Offers, Options, Commits',
        'Self-talk',
        'Downplayer',
        'Maybe/Accept-part',
        'Tag-Question',
        'Declarative Wh-Question',
        'Apology',
        'Thanking'
    ],
    'act_tag':[
        'sd',
        'b',
        'sv',
        'aa',
        '%',
        'ba',
        'qy',
        'x',
        'ny',
        'fc',
        '%',
        'qw',
        'nn',
        'bk',
        'h',
        'qy^d',
        'fo_o_fw_by_bc',
        'bh',
        '^q',
        'bf',
        'na',
        'ad',
        '^2',
        'b^m',
        'qo',
        'qh',
        '^h',
        'ar',
        'ng',
        'br',
        'no',
        'fp',
        'qrr',
        'arp_nd',
        't3',
        'oo_co_cc',
        't1',
        'bd',
        'aap_am',
        '^g',
        'qw^d',
        'fa',
        'ft'
    ],

    'example':[
        "Me, I'm in the legal department.",
        "Uh-huh.",
        "I think it's great",
        "That's exactly it.",
        "So, -",
        "I can imagine.",
        "Do you have to have any special training?",
        "[Laughter], [Throat_clearing]",
        "Yes.",
        "Well, it's been nice talking to you.",
        "But, uh, yeah",
        "Well, how old are you?",
        "No.",
        "Oh, okay.",
        "I don't know if I'm making any sense or not.",
        "So you can afford to get a house?",
        "Well give me a break, you know.",
        "Is that right?",
        "You can't be pregnant and have cats",
        "Oh, you mean you switched schools for the kids.",
        "It is.",
        "Why don't you go first",
        "Who aren't contributing.",
        "Oh, fajitas",
        "How about you?",
        "Who would steal a newspaper?",
        "I'm drawing a blank.",
        "Well, no",
        "Uh, not a whole lot.",
        "Excuse me?",
        "I don't know",
        "How are you?",
        "or is it more of a company?",
        "Well, not so much that.",
        "My goodness, Diane, get down from there.",
        "I'll have to check that out",
        "What's the word I'm looking for",
        "That's all right.",
        "Something like that",
        "Right?",
        "You are what kind of buff?",
        "I'm sorry.",
        "Hey thanks a lot"
    ]
    }

class DADataset(Dataset):
    
    __label_dict = dict()
    
    def __init__(self, tokenizer, data, text_field = "clean_text", label_field="act_label_1", max_len=512, label_dict=None, device='cpu'):
        
        self.text = list(data[text_field]) #data['train'][text_field]
        self.acts = list(data[label_field]) #['train'][label_field]
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.device = device

        if label_dict is None:
            # build/update the label dictionary
            classes = sorted(set(self.acts))
        
            for cls in classes:
                if cls not in DADataset.__label_dict.keys():
                    DADataset.__label_dict[cls]=len(DADataset.__label_dict.keys())
        else:
            DADataset.__label_dict = label_dict
    
    def __len__(self):
        return len(self.text)
    
    def label_dict(self):
        return DADataset.__label_dict
    
    def __getitem__(self, index):
        
        text = self.text[index]
        act = self.acts[index]
        label = DADataset.__label_dict[act]
        
        input_encoding = self.tokenizer.encode_plus(
            text=text,
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt",
            return_attention_mask=True,
            padding="max_length",
        ).to(self.device)
        
        seq_len = len(self.tokenizer.tokenize(text))
        
        return {
            "text":text,
            "input_ids":input_encoding['input_ids'].squeeze(),
            "attention_mask":input_encoding['attention_mask'].squeeze(),
            "seq_len":seq_len,
            "act":act,
            "label":torch.tensor([label], dtype=torch.long),
        }


In [20]:
import torch.nn as nn
import torch
# from .UtteranceRNN import UtteranceRNN
# from .ConversationRNN import ConversationRNN
# from .ContextAwareAttention import ContextAwareAttention



class ContextAwareDAC(nn.Module):
    
    def __init__(self, model_name="roberta-base", hidden_size=768, num_classes=18, device=torch.device("cpu")):
        
        super(ContextAwareDAC, self).__init__()
        
        self.in_features = 2*hidden_size
        
        self.device = device
        
        # utterance encoder model
        self.utterance_rnn = UtteranceRNN(model_name=model_name, hidden_size=hidden_size, device=device)
        
        # context aware self attention module
        self.context_aware_attention = ContextAwareAttention(hidden_size=2*hidden_size, output_size=hidden_size, seq_len=128)
        
        # conversaton level rnn
        self.conversation_rnn = ConversationRNN(input_size=1, hidden_size=hidden_size)
        
        # classifier on top of feature extractor
        self.classifier = nn.Sequential(*[
            nn.Linear(in_features=self.in_features, out_features=256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=num_classes)
        ])
        
        # initial hidden_states
        self.hx = torch.randn((2, 1, hidden_size), device=self.device)
        
    
    def forward(self, batch):
        """
            x.shape = [batch, seq_len, hidden_size]
        """
        
        
        outputs = self.utterance_rnn(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'], seq_len=batch['seq_len'].tolist())
        
        batch = batch['input_ids'].shape[0]
        
        # create an empty feature vector 
        features = torch.empty((0, self.in_features), device=self.device)
        
        # hidden
        hx = self.hx
        
    
        for i, x in enumerate(outputs):
            
            x = x.unsqueeze(0)
            
            # get sentence representation as 2d-matrix and project it linearly
            m = self.context_aware_attention(hidden_states=x, h_forward=hx[0].detach())
            
            # apply rnn on linearly projected vector
            hx = self.conversation_rnn(input_=m, hx=hx.detach())
            
            # concat current utterance's last hidden state to the features vector
            features = torch.cat((features, hx.view(1, -1)), dim=0)
            
        
        self.hx = hx.detach()
        
        logits = self.classifier(features)
        
        return logits

import torch.nn as nn

class ConversationRNN(nn.Module):
    
    def __init__(self, input_size=1, hidden_size=768, bidirectional=True, num_layers=1):
        super(ConversationRNN, self).__init__()
        
        
        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size, 
            num_layers=num_layers, 
            bidirectional=bidirectional,
            batch_first=True
        )
        
    
    def forward(self, input_, hx=None):
        
        """
            input_.shape = [batch, input_size] # input_size was chosen in attention module
            hx.shape = [2, batch_size, hidden_size]
        """
        
        _, hidden = self.rnn(input=input_, hx=hx)
        
        return hidden


import torch
import  torch.nn as nn
from transformers import AutoConfig, AutoModel, AutoTokenizer


class UtteranceRNN(nn.Module):
    
    def __init__(self, model_name="roberta-base", hidden_size=768, bidirectional=True, num_layers=1, device=torch.device("cpu")):
        super(UtteranceRNN, self).__init__()
        self.device=device
        
        # embedding layer is replaced by pretrained roberta's embedding
        self.base = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name, return_dict=False)
        self.base.to(device)
        # freeze the model parameters
        # for param in self.base.parameters():
        #     param.requires_grad = False
        
        #self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_size)
        self.rnn = nn.RNN(
            input_size=hidden_size, 
            hidden_size=hidden_size,
            num_layers=num_layers, 
            bidirectional=bidirectional,
            batch_first=True,
        )
    
    def forward(self, input_ids, attention_mask, seq_len):
        """
            x.shape = [batch_size, seq_len]
        """
        
    
        hidden_states,_ = self.base(input_ids, attention_mask) # hidden_states.shape = [batch, max_len, hidden_size]
        
        # padding and packing 
        #packed_hidden_states = nn.utils.rnn.pack_padded_sequence(hidden_states, seq_len, batch_first=True, enforce_sorted=False)   
        
        #packed_outputs, _ = self.rnn(packed_hidden_states)
        
        #packed_outputs is a packed sequence containing all hidden states
        #hidden is now from the final non-padded element in the batch
        
        #outputs, _ = nn.utils.rnn.pad_packed_sequence(packed_outputs, batch_first=True)
        
        outputs,_ = self.rnn(hidden_states)
                
        return outputs

import torch.nn as nn
import torch

class ContextAwareAttention(nn.Module):
    
    def __init__(self, hidden_size=1536, output_size=768, seq_len=128, device=torch.device("cpu")):
        super(ContextAwareAttention, self).__init__()
        self.device=device
        
        # context aware self attention
        self.fc_1 = nn.Linear(in_features=hidden_size, out_features=output_size, bias=False)
        self.fc_3 = nn.Linear(in_features=hidden_size//2, out_features=output_size, bias=True)
        self.fc_2 = nn.Linear(in_features=output_size, out_features=128, bias=False)
        
        # linear projection
        self.linear_projection = nn.Linear(in_features=hidden_size, out_features=1, bias=True)
        
    
    def forward(self, hidden_states, h_forward):
        """
            hidden_states.shape = [batch, seq_len, hidden_size]
            h_forward.shape = [1, hidden_size]
        """
        
        
        # compute the energy
        S = self.fc_2(torch.tanh(self.fc_1(hidden_states) + self.fc_3(h_forward.unsqueeze(1))))
        # S.shape = [batch, seq_len, input_size] # input_size is hyperparameter
        
        # compute the attention
        A = S.softmax(dim=-1)
        
        # Compute the sentence representation
        M = torch.matmul(A.permute(0, 2, 1), hidden_states)
        
        # linear projection of the sentence
        x = self.linear_projection(M)
        
        return x          

In [21]:
from transformers import AutoConfig, AutoTokenizer, AutoModel
model = ContextAwareDAC(
    model_name=config['model_name'],
    hidden_size=config['hidden_size'],
    num_classes=config['num_classes'],
    device=config['device']
)
tokenizer = AutoTokenizer.from_pretrained(config['model_name'])

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
train_data = pd.read_csv(os.path.join(config['data_dir'],config['dataset'], config['dataset']+"_train.csv"))

In [28]:
from datasets import load_dataset
train_data1 = load_dataset("csv", data_files=os.path.join(config['data_dir'], config['dataset'], config['dataset']+"_train.csv"))

Using custom data configuration default-54a591ab78508fe4


Downloading and preparing dataset csv/default to C:\Users\Surya\.cache\huggingface\datasets\csv\default-54a591ab78508fe4\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519...


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 1000.07it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 166.59it/s]


Dataset csv downloaded and prepared to C:\Users\Surya\.cache\huggingface\datasets\csv\default-54a591ab78508fe4\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519. Subsequent calls will reuse this data.


100%|██████████| 1/1 [00:00<00:00, 71.41it/s]


In [29]:
train_data1

DatasetDict({
    train: Dataset({
        features: ['DamslActTag', 'Text'],
        num_rows: 193320
    })
})

In [22]:
train_dataset = DADataset(
                        tokenizer=tokenizer, 
                        data=train_data, 
                        max_len=config['max_len'], 
                        text_field=config['text_field'], 
                        label_field=config['label_field']
                        )

In [25]:
train_loader = DataLoader(
                        dataset=train_dataset, 
                        batch_size=config['batch_size'], 
                        shuffle=False, 
                        num_workers=config['num_workers']
                        )

In [27]:
train_data

Unnamed: 0,DamslActTag,Text
0,"fo_o_fw_""_by_bc",Okay <laughter>.
1,x,<Talking> <laughter>. *slash error
2,qh,Where to start.
3,sd,"I haven't had that much, of course"
4,%,"I just heard,"
...,...,...
193315,%,<<Very faint>> Huh.
193316,sv,"Uh, my job is government tooling specialist."
193317,b,"Oh, okay,"
193318,bf,"so, you know about some of these things."


In [13]:
valid_data = pd.read_csv(os.path.join(config['data_dir'], config['dataset'], config['dataset']+"_valid.csv"))