In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader

# Preparing data

In [None]:
# Slot dictionary for the Facebook Multilingual dataset
slot_dict = {
 'O': 0,
 'B-datetime': 1,
 'I-datetime': 2,
 'B-alarm/alarm_modifier': 3,
 'I-alarm/alarm_modifier': 4,
 'B-reminder/noun': 5,
 'I-reminder/noun': 6,
 'B-reminder/todo': 7,
 'I-reminder/todo': 8,
 'B-reminder/reference': 9,
 'I-reminder/reference': 10,
 'B-reminder/reminder_modifier': 11,
 'I-reminder/reminder_modifier': 12,
 'B-reminder/recurring_period': 13,
 'I-reminder/recurring_period': 14,
 'B-location': 15,
 'I-location': 16,
 'B-weather/temperatureUnit': 17,
 'I-weather/temperatureUnit': 18,
 'B-weather/noun': 19,
 'I-weather/noun': 20,
 'B-weather/attribute': 21,
 'I-weather/attribute': 22,
 }

In [None]:
### Slot dictionary for the Persian-ATIS dataset
# slot_dict = {
#  'O': 0,
#  'B-aircraft_code': 1,
#  'I-aircraft_code': 2,
#  'B-airline_code': 3,
#  'I-airline_code': 4,
#  'B-airline_name': 5,
#  'I-airline_name': 6,
#  'B-airport_code': 7,
#  'I-airport_code': 8,
#  'B-airport_name': 9,
#  'I-airport_name': 10,
#  'B-arrive_date.date_relative': 11,
#  'I-arrive_date.date_relative': 12,
#  'B-arrive_date.day_name': 13,
#  'I-arrive_date.day_name': 14,
#  'B-arrive_date.day_number': 15,
#  'I-arrive_date.day_number': 16,
#  'B-arrive_date.month_name': 17,
#  'I-arrive_date.month_name': 18,
#  'B-arrive_date.today_relative': 19,
#  'I-arrive_date.today_relative': 20,
#  'B-arrive_time.end_time': 21,
#  'I-arrive_time.end_time': 22,
#  'B-arrive_time.period_mod': 23,
#  'I-arrive_time.period_mod': 24,
#  'B-arrive_time.period_of_day': 25,
#  'I-arrive_time.period_of_day': 26,
#  'B-arrive_time.start_time': 27,
#  'I-arrive_time.start_time': 28,
#  'B-arrive_time.time': 29,
#  'I-arrive_time.time': 30,
#  'B-arrive_time.time_relative': 31,
#  'I-arrive_time.time_relative': 32,
#  'B-city_name': 33,
#  'I-city_name': 34,
#  'B-class_type': 35,
#  'I-class_type': 36,
#  'B-compartment': 37,
#  'I-compartment': 38,
#  'B-connect': 39,
#  'I-connect': 40,
#  'B-cost_relative': 41,
#  'I-cost_relative': 42,
#  'B-day_name': 43,
#  'I-day_name': 44,
#  'B-day_name.depart_date': 45,
#  'I-day_name.depart_date': 46,
#  'B-day_number': 47,
#  'I-day_number': 48,
#  'B-days_code': 49,
#  'I-days_code': 50,
#  'B-depart_date.date_relative': 51,
#  'I-depart_date.date_relative': 52,
#  'B-depart_date.day_name': 53,
#  'I-depart_date.day_name': 54,
#  'B-depart_date.day_number': 55,
#  'I-depart_date.day_number': 56,
#  'B-depart_date.month_name': 57,
#  'I-depart_date.month_name': 58,
#  'B-depart_date.time': 59,
#  'I-depart_date.time': 60,
#  'B-depart_date.today_relative': 61,
#  'I-depart_date.today_relative': 62,
#  'B-depart_date.year': 63,
#  'I-depart_date.year': 64,
#  'B-depart_time.end_time': 65,
#  'I-depart_time.end_time': 66,
#  'B-depart_time.period_mod': 67,
#  'I-depart_time.period_mod': 68,
#  'B-depart_time.period_of_day': 69,
#  'I-depart_time.period_of_day': 70,
#  'B-depart_time.start_time': 71,
#  'I-depart_time.start_time': 72,
#  'B-depart_time.time': 73,
#  'I-depart_time.time': 74,
#  'B-depart_time.time_relative': 75,
#  'I-depart_time.time_relative': 76,
#  'B-economy': 77,
#  'I-economy': 78,
#  'B-fare_amount': 79,
#  'I-fare_amount': 80,
#  'B-fare_basis_code': 81,
#  'I-fare_basis_code': 82,
#  'B-flight_days': 83,
#  'I-flight_days': 84,
#  'B-flight_mod': 85,
#  'I-flight_mod': 86,
#  'B-flight_number': 87,
#  'I-flight_number': 88,
#  'B-flight_stop': 89,
#  'I-flight_stop': 90,
#  'B-flight_time': 91,
#  'I-flight_time': 92,
#  'B-fromloc.airport_code': 93,
#  'I-fromloc.airport_code': 94,
#  'B-fromloc.airport_name': 95,
#  'I-fromloc.airport_name': 96,
#  'B-fromloc.city_name': 97,
#  'I-fromloc.city_name': 98,
#  'B-fromloc.state_code': 99,
#  'I-fromloc.state_code': 100,
#  'B-fromloc.state_name': 101,
#  'I-fromloc.state_name': 102,
#  'B-meal': 103,
#  'I-meal': 104,
#  'B-meal_code': 105,
#  'I-meal_code': 106,
#  'B-meal_description': 107,
#  'I-meal_description': 108,
#  'B-mod': 109,
#  'I-mod': 110,
#  'B-month_name': 111,
#  'I-month_name': 112,
#  'B-or': 113,
#  'I-or': 114,
#  'B-period_of_day': 115,
#  'I-period_of_day': 116,
#  'B-restriction_code': 117,
#  'I-restriction_code': 118,
#  'B-return_date.date_relative': 119,
#  'I-return_date.date_relative': 120,
#  'B-return_date.day_name': 121,
#  'I-return_date.day_name': 122,
#  'B-return_date.day_number': 123,
#  'I-return_date.day_number': 124,
#  'B-return_date.month_name': 125,
#  'I-return_date.month_name': 126,
#  'B-return_date.today_relative': 127,
#  'I-return_date.today_relative': 128,
#  'B-return_time.period_mod': 129,
#  'I-return_time.period_mod': 130,
#  'B-return_time.period_of_day': 131,
#  'I-return_time.period_of_day': 132,
#  'B-return_time.time': 133,
#  'I-return_time.time': 134,
#  'B-round_trip': 135,
#  'I-round_trip': 136,
#  'B-state_code': 137,
#  'I-state_code': 138,
#  'B-state_name': 139,
#  'I-state_name': 140,
#  'B-stoploc.airport_code': 141,
#  'I-stoploc.airport_code': 142,
#  'B-stoploc.city_name': 143,
#  'I-stoploc.city_name': 144,
#  'B-stoploc.state_code': 145,
#  'I-stoploc.state_code': 146,
#  'B-stoploc.state_name': 147,
#  'I-stoploc.state_name': 148,
#  'B-time': 149,
#  'I-time': 150,
#  'B-time_relative': 151,
#  'I-time_relative': 152,
#  'B-today_relative': 153,
#  'I-today_relative': 154,
#  'B-toloc.airport_code': 155,
#  'I-toloc.airport_code': 156,
#  'B-toloc.airport_name': 157,
#  'I-toloc.airport_name': 158,
#  'B-toloc.city_name': 159,
#  'I-toloc.city_name': 160,
#  'B-toloc.country_name': 161,
#  'I-toloc.country_name': 162,
#  'B-toloc.state_code': 163,
#  'I-toloc.state_code': 164,
#  'B-toloc.state_name': 165,
#  'I-toloc.state_name': 166,
#  'B-transport_type': 167,
#  'I-transport_type': 168,
#  'B-booking_class': 169,
#  'I-booking_class': 170,
#  'B-flight': 171,
#  'I-flight': 172,
#  'B-stoploc.airport_name': 173,
#  'I-stoploc.airport_name': 174}

In [None]:
def read_file(file_name):
  path = '/content/drive/MyDrive/data/'
  f = open(path + file_name, 'r')
  f_list = f.readlines()
  f.close()
  return f_list

## EN

In [None]:
# EN (tokens)
list_all_tokens_train_en = read_file('path_to_data')
list_all_tokens_eval_en = read_file('path_to_data')
list_all_tokens_test_en = read_file('path_to_data')

# list_all_tokens_train_en = read_file('persianATIS/english-atis-tokens-train.txt')
# list_all_tokens_eval_en = read_file('persianATIS/english-atis-tokens-eval.txt')
# list_all_tokens_test_en = read_file('persianATIS/english-atis-tokens-test.txt')

In [None]:
# EN (slots)
list_all_slots_train_en = read_file('path_to_data')
list_all_slots_eval_en = read_file('path_to_data')
list_all_slots_test_en = read_file('path_to_data')

# list_all_slots_train_en = read_file('persianATIS/labels-english-atis-train.txt')
# list_all_slots_eval_en = read_file('persianATIS/labels-english-atis-eval.txt')
# list_all_slots_test_en = read_file('persianATIS/labels-english-atis-test.txt')

## ES, FA

In [None]:
# # ES (tokens)
list_all_tokens_train_aux = read_file('es/list_token_train_es.txt')
list_all_tokens_eval_aux = read_file('es/list_token_eval_es.txt')
# list_all_tokens_test_aux = read_file('es/list_token_test_es.txt')

# list_all_tokens_train_aux = read_file('persianATIS/persian-atis-tokens-train.txt')
# list_all_tokens_eval_aux = read_file('persianATIS/persian-atis-tokens-eval.txt')
# list_all_tokens_test_aux = read_file('persianATIS/persian-atis-tokens-test.txt')

In [None]:
# ES (slots)
list_all_slots_train_aux = read_file('es/label-train-es2.txt')
list_all_slots_eval_aux = read_file('es/label-eval-es2.txt')
# list_all_slots_test_aux = read_file('es/label-test-es2.txt')

# list_all_slots_train_aux = read_file('persianATIS/labels-persian-atis-train.txt')
# list_all_slots_eval_aux = read_file('persianATIS/labels-persian-atis-eval.txt')
# list_all_slots_test_aux = read_file('persianATIS/labels-persian-atis-test.txt')

## TH

In [None]:
list_all_tokens_train_th = read_file('th/list_token_train_th.txt')
list_all_slots_train_th = read_file('th/label-train-th2.txt')

In [None]:
list_all_tokens_test_th = read_file('th/list_token_test_th.txt')
list_all_slots_test_th = read_file('th/label-test-th2.txt')

## others

In [None]:
list_all_tokens_train_fa = read_file('fa/list_token_train_fa.txt')
list_all_slots_train_fa = read_file('fa/label-train-fa2.txt')

list_all_tokens_test_fa = read_file('fa/list_token_test_fa.txt')
list_all_slots_test_fa = read_file('fa/label-test-fa2.txt')

In [None]:
list_all_tokens_train_it = read_file('en-it/tokenization-train-en-to-it.txt')
list_all_slots_train_it = read_file('en-it/label-train-it.txt')

list_all_tokens_test_it = read_file('en-it/tokenization-test-en-to-it.txt')
list_all_slots_test_it = read_file('en-it/label-test-it.txt')

In [None]:
import ast
def string_to_list(my_list):
  for i in range(len(my_list)):
    my_list[i] = ast.literal_eval(my_list[i])
  return my_list

In [None]:
# EN
list_all_tokens_train_en = string_to_list(list_all_tokens_train_en)
list_all_slots_train_en = string_to_list(list_all_slots_train_en)

list_all_tokens_eval_en = string_to_list(list_all_tokens_eval_en)
list_all_slots_eval_en = string_to_list(list_all_slots_eval_en)

list_all_tokens_test_en = string_to_list(list_all_tokens_test_en)
list_all_slots_test_en = string_to_list(list_all_slots_test_en)

In [None]:
# ES
list_all_tokens_train_aux = string_to_list(list_all_tokens_train_aux)
list_all_slots_train_aux = string_to_list(list_all_slots_train_aux)

list_all_tokens_eval_aux = string_to_list(list_all_tokens_eval_aux)
list_all_slots_eval_aux = string_to_list(list_all_slots_eval_aux)

# list_all_tokens_test_aux = string_to_list(list_all_tokens_test_aux)
# list_all_slots_test_aux = string_to_list(list_all_slots_test_aux)

In [None]:
# TH
list_all_tokens_train_th = string_to_list(list_all_tokens_train_th)
list_all_slots_train_th = string_to_list(list_all_slots_train_th)

list_all_tokens_test_th = string_to_list(list_all_tokens_test_th)
list_all_slots_test_th = string_to_list(list_all_slots_test_th)

In [None]:
# FA
list_all_tokens_train_fa = string_to_list(list_all_tokens_train_fa)
list_all_slots_train_fa = string_to_list(list_all_slots_train_fa)

list_all_tokens_test_fa = string_to_list(list_all_tokens_test_fa)
list_all_slots_test_fa = string_to_list(list_all_slots_test_fa)

In [None]:
#IT
list_all_tokens_train_it = string_to_list(list_all_tokens_train_it)
list_all_slots_train_it = string_to_list(list_all_slots_train_it)

list_all_tokens_test_it = string_to_list(list_all_tokens_test_it)
list_all_slots_test_it = string_to_list(list_all_slots_test_it)

In [None]:
def slots_to_ids(slot_dict, slot_list):
    for i in range(len(slot_list)):
      sl = slot_list[i]
      for j in range(len(sl)):
        label = sl[j]
        # print(label)
        if label in ['B-news/type','B-negation', 'I-negation', 'B-timer/noun', 'B-timer/attributes', 'B-demonstrative_reference', 'I-demonstrative_reference']: label = 'O'
        if label == 'B-alarm/recurring_period': label = 'B-datetime'
        if label == 'I-alarm/recurring_period': label = 'I-datetime'

        ### persian atis ###
        if label.startswith(' '): label = label[1:]
        if label.startswith('\u200c'): label = label[1:]
        if label == 'O-depart_time.time': label = 'O'
        if label.startswith('B.'): label = 'B-'+label[2:]
        if label.startswith('I.'): label = 'I-'+label[2:]
        if label == 'I-depart_date.time_relative': label = 'I-depart_time.time_relative'

        label_id = slot_dict[label]
        slot_list[i][j] = label_id

    return slot_list

In [None]:
# EN
list_all_slots_train_en_ids_org = slots_to_ids(slot_dict, list_all_slots_train_en)
list_all_slots_eval_en_ids_org = slots_to_ids(slot_dict, list_all_slots_eval_en)
list_all_slots_test_en_ids_org = slots_to_ids(slot_dict, list_all_slots_test_en)

In [None]:
# ES
list_all_slots_train_aux_ids_org = slots_to_ids(slot_dict, list_all_slots_train_aux)
list_all_slots_eval_aux_ids_org = slots_to_ids(slot_dict, list_all_slots_eval_aux)
# list_all_slots_test_aux_ids_org = slots_to_ids(slot_dict, list_all_slots_test_aux)

In [None]:
# TH
list_all_slots_train_th_ids_org = slots_to_ids(slot_dict, list_all_slots_train_th)
list_all_slots_test_th_ids_org = slots_to_ids(slot_dict, list_all_slots_test_th)

In [None]:
# FA
list_all_slots_train_fa_ids_org = slots_to_ids(slot_dict, list_all_slots_train_fa)
list_all_slots_test_fa_ids_org = slots_to_ids(slot_dict, list_all_slots_test_fa)

In [None]:
# IT
list_all_slots_train_it_ids_org = slots_to_ids(slot_dict, list_all_slots_train_it)
list_all_slots_test_it_ids_org = slots_to_ids(slot_dict, list_all_slots_test_it)

In [None]:
intent_dict = {'alarm/set_alarm': 0,
 'alarm/show_alarms': 1,
 'alarm/cancel_alarm': 2,
 'alarm/time_left_on_alarm': 3,
 'alarm/modify_alarm': 4,
 'alarm/snooze_alarm': 5,
 'reminder/set_reminder': 6,
 'reminder/show_reminders': 7,
 'reminder/cancel_reminder': 8,
 'weather/find': 9,
 'weather/checkSunrise': 10,
 'weather/checkSunset': 11}

In [None]:
### Persian-ATIS ###
# intent_dict = {
#   'flight_no': 0,
#   'airfare+flight': 1,
#   'capacity': 2,
#   'airfare+flight_time': 3,
#   'quantity': 4,
#   'airfare': 5,
#   'ground_service+ground_fare': 6,
#   'city': 7,
#   'flight_no+airline': 8,
#   'flight': 9,
#   'flight+airfare': 10,
#   'airport': 11,
#   'abbreviation': 12,
#   'cheapest': 13,
#   'aircraft+flight+flight_no': 14,
#   'distance': 15,
#   'restriction': 16,
#   'meal': 17,
#   'aircraft': 18,
#   'flight_time': 19,
#   'flight+airline': 20,
#   'ground_fare': 21,
#   'airline+flight_no': 22,
#   'airline': 23,
#   'ground_service': 24,
#   'day_name': 25,
# }

In [None]:
def convert_label(file_name, intent_dict):
  intent_list = read_file(file_name)
  label_list = []
  for int_ in intent_list:
    the_intent = int_.strip()
    label_list.append(intent_dict[the_intent])
  return label_list

In [None]:
# EN
intent_label_train_en = torch.tensor(convert_label('new_en/train_en_sep_intent.txt', intent_dict))
intent_label_eval_en = torch.tensor(convert_label('new_en/eval_en_sep_intent.txt', intent_dict))
intent_label_test_en = torch.tensor(convert_label('new_en/test_en_sep_intent.txt', intent_dict))

# intent_label_train_en = torch.tensor(convert_label('persianATIS/intent-english-atis-train.txt', intent_dict))
# intent_label_eval_en = torch.tensor(convert_label('persianATIS/intent-english-atis-eval.txt', intent_dict))
# intent_label_test_en = torch.tensor(convert_label('persianATIS/intent-english-atis-test.txt', intent_dict))

In [None]:
# ES
intent_label_train_aux = torch.tensor(convert_label('es/intent_train-es.txt', intent_dict))
intent_label_eval_aux = torch.tensor(convert_label('es/intent_eval-es.txt', intent_dict))
# intent_label_test_aux = torch.tensor(convert_label('es/intent_test-es.txt', intent_dict))

# intent_label_train_aux = torch.tensor(convert_label('persianATIS/intent-persian-atis-train.txt', intent_dict))
# intent_label_eval_aux = torch.tensor(convert_label('persianATIS/intent-persian-atis-eval.txt', intent_dict))
# intent_label_test_aux = torch.tensor(convert_label('persianATIS/intent-persian-atis-test.txt', intent_dict))

In [None]:
# TH
intent_label_train_th = torch.tensor(convert_label('th/intent-train-th.txt', intent_dict))
intent_label_test_th = torch.tensor(convert_label('th/intent-test-th.txt', intent_dict))

In [None]:
# FA
intent_label_train_fa = torch.tensor(convert_label('fa/intent_train-fa.txt', intent_dict))
intent_label_test_fa = torch.tensor(convert_label('fa/intent_test-fa.txt', intent_dict))

In [None]:
# IT
intent_label_train_it = torch.tensor(convert_label('new_en/train_en_sep_intent.txt', intent_dict))
intent_label_test_it = torch.tensor(convert_label('new_en/test_en_sep_intent.txt', intent_dict))

# BART

In [None]:
# from transformers import BertPreTrainedModel, BertModel, BertConfig, BertTokenizer

from transformers import MBartForConditionalGeneration, MBartTokenizer

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
model_name = 'facebook/mbart-large-50'  #'facebook/mbart-large-cc25'

In [None]:
def bart_text_preparation(text, slot_labels, tokenizer):
  just_check = []
  tokenized_text = []
  slot_labels_ids = []
  special_token_label_id = -1
  # O_id = 0
  pad_token_id = tokenizer.pad_token_id # pad_token_id is 1
  marked_text = ['<s>'] + text + ['</s>']
  slot_labels = [special_token_label_id] + slot_labels + [special_token_label_id]
  max_seq_len = 46  #37

  for word, slot_label in zip(marked_text, slot_labels):
    # print(word)
    # print(slot_label)
    word_tokens = tokenizer.tokenize(word)
    # print(word_tokens)
    # if not word_tokens:
    #     word_tokens = [special_token_label_id]  # For handling the bad-encoded word
    tokenized_text.extend(word_tokens)

    # Use the real label id for the first token of the word, and padding ids for the remaining tokens
    first_label = int(slot_label)
    if first_label == 0: #O
      other_labels = 0
    elif first_label%2 == 1: #Odd numbers: B-
      other_labels = first_label + 1
    else:
      other_labels = first_label

    slot_labels_ids.extend([first_label] + [other_labels] * (len(word_tokens) - 1))

  indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
  attention_mask = [1]*len(indexed_tokens)

  padding_length = max_seq_len - len(indexed_tokens)
  indexed_tokens = indexed_tokens + ([pad_token_id] * padding_length)


  attention_mask = attention_mask + [0]*padding_length
  slot_labels_ids = slot_labels_ids + ([special_token_label_id] * padding_length)

  ### truncation
  indexed_tokens = indexed_tokens[:max_seq_len]
  attention_mask = attention_mask[:max_seq_len]
  slot_labels_ids = slot_labels_ids[:max_seq_len]

  tokens_tensor = torch.tensor([indexed_tokens])
  segments_tensors = torch.tensor([attention_mask])

  return tokens_tensor, segments_tensors, slot_labels_ids

In [None]:
# tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

tokenizer = MBartTokenizer.from_pretrained(model_name)

# Discriminator

In [None]:
class Discriminator(nn.Module):
  def __init__(self, in_features): #in_features: 1024
    super().__init__()
    self.disc = nn.Sequential(
      nn.Linear(in_features, 256),
      nn.Tanh(),
      nn.Linear(256, 128),
      nn.Sigmoid(),
      nn.Linear(128, 1), #nclass: 1
      nn.Sigmoid()
    )

  def forward(self, x):
    return self.disc(x)

In [None]:
# class Generator(nn.Module):
#   def __init__(self, in_features, hidden_size, num_layers):
#     super(Generator, self).__init__()
#     self.hidden_size = hidden_size
#     self.num_layers = num_layers
#     self.gen = nn.LSTM(in_features, hidden_size, num_layers, batch_first = True, bidirectional=True)

#   def forward(self, x):

#     h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).to(device)
#     c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).to(device)

#     out, _ = self.gen(x, (h0, c0))
#     # h_out = h_out.view(-1, self.hidden_size)

#     return out

# Encoder

In [None]:
model = MBartForConditionalGeneration.from_pretrained(model_name)

In [None]:
for param in model.model.encoder.parameters():
  param.requires_grad = False

  modules = [model.model.encoder.embed_positions, *model.model.encoder.layers[-3:]]

  for module in modules:
    for param in module.parameters():
      param.requires_grad = True

In [None]:
model = model.to(device)

In [None]:
class Encoder(nn.Module):
    def __init__(self, model):
        super(Encoder, self).__init__()
        self.mbart = model

    def forward(self, x, attention_mask=None):
        encoder_output = self.mbart(input_ids=x, attention_mask=attention_mask)
        token_embeddings = encoder_output['encoder_last_hidden_state']
        return token_embeddings

# Decoder

In [None]:
class Decoder(nn.Module):
  def __init__(self, model):
      super(Decoder, self).__init__()
      self.mbart = model

  def forward(self, x, encoder_hidden_states):
      decoder_output = self.mbart.model.decoder(input_ids=x, encoder_hidden_states=encoder_hidden_states)
      # output = decoder_output.last_hidden_state

      logits = self.mbart.lm_head(decoder_output.last_hidden_state)
      # probabs = F.softmax(logits, dim=-1)

      return logits

# loss & opt

In [None]:
encoder_model = model.to(device)

In [None]:
disc_ = Discriminator(1024).to(device)
opt_disc = optim.Adam(disc_.parameters(), lr=3e-4)

# gen_ = Generator(768, 256, 2).to(device)
# opt_gen = optim.Adam(gen_.parameters(), lr=2e-5)

In [None]:
opt_gen = optim.Adam(list(encoder_model.parameters()) + list(slot_tagger.parameters()) + list(classifier.parameters()), lr=5e-5)

In [None]:
decoder_model = Decoder(model).to(device)
# opt_decoder = optim.Adam(decoder_model.parameters(), lr=3e-4)

In [None]:
reconstruction_loss = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)

In [None]:
criterion = nn.BCELoss()

# Data Loader

In [None]:
def get_tokenized_data(in_hand_data, in_hand_labels):
  list_token = [] #list of token_ids
  list_attention = []
  list_slot_labels_ids = []
  for i in range(len(in_hand_data)):
    tokens_tensor, segments_tensors, slot_labels_ids = bart_text_preparation(in_hand_data[i], in_hand_labels[i], tokenizer)


    list_token.append(tokens_tensor)
    list_attention.append(segments_tensors[0])
    list_slot_labels_ids.append(slot_labels_ids)

  return list_token, list_attention, list_slot_labels_ids

In [None]:
lst_tkn_train_en, lst_att_train_en, lst_slot_train_en = get_tokenized_data(list_all_tokens_train_en, list_all_slots_train_en_ids_org)

lst_tkn_eval_en, lst_att_eval_en, lst_slot_eval_en = get_tokenized_data(list_all_tokens_eval_en, list_all_slots_eval_en_ids_org)

lst_tkn_test_en, lst_att_test_en, lst_slot_test_en = get_tokenized_data(list_all_tokens_test_en, list_all_slots_test_en_ids_org)

In [None]:
lst_tkn_train_aux, lst_att_train_aux, lst_slot_train_aux = get_tokenized_data(list_all_tokens_train_aux, list_all_slots_train_aux_ids_org)

lst_tkn_eval_aux, lst_att_eval_aux, lst_slot_eval_aux = get_tokenized_data(list_all_tokens_eval_aux, list_all_slots_eval_aux_ids_org)

# lst_tkn_test_aux, lst_att_test_aux, lst_slot_test_aux = get_tokenized_data(list_all_tokens_test_aux, list_all_slots_test_aux_ids_org)

In [None]:
lst_tkn_train_th, lst_att_train_th, lst_slot_train_th = get_tokenized_data(list_all_tokens_train_th, list_all_slots_train_th_ids_org)

lst_tkn_test_th, lst_att_test_th, lst_slot_test_th = get_tokenized_data(list_all_tokens_test_th, list_all_slots_test_th_ids_org)

In [None]:
lst_tkn_train_fa, lst_att_train_fa, lst_slot_train_fa = get_tokenized_data(list_all_tokens_train_fa, list_all_slots_train_fa_ids_org)

lst_tkn_test_fa, lst_att_test_fa, lst_slot_test_fa = get_tokenized_data(list_all_tokens_test_fa, list_all_slots_test_fa_ids_org)

In [None]:
lst_tkn_train_it, lst_att_train_it, lst_slot_train_it = get_tokenized_data(list_all_tokens_train_it, list_all_slots_train_it_ids_org)

lst_tkn_test_it, lst_att_test_it, lst_slot_test_it = get_tokenized_data(list_all_tokens_test_it, list_all_slots_test_it_ids_org)

In [None]:
tensor_tkn_train_en = torch.cat([l for l in lst_tkn_train_en], dim = 0)
tensor_tkn_eval_en = torch.cat([l for l in lst_tkn_eval_en], dim = 0)
tensor_tkn_test_en = torch.cat([l for l in lst_tkn_test_en], dim = 0)

In [None]:
tensor_tkn_train_aux = torch.cat([l for l in lst_tkn_train_aux], dim = 0)
tensor_tkn_eval_aux = torch.cat([l for l in lst_tkn_eval_aux], dim = 0)
# tensor_tkn_test_aux = torch.cat([l for l in lst_tkn_test_aux], dim = 0)

In [None]:
tensor_tkn_train_th = torch.cat([l for l in lst_tkn_train_th], dim = 0)
tensor_tkn_test_th = torch.cat([l for l in lst_tkn_test_th], dim = 0)

In [None]:
tensor_tkn_train_fa = torch.cat([l for l in lst_tkn_train_fa], dim = 0)
tensor_tkn_test_fa = torch.cat([l for l in lst_tkn_test_fa], dim = 0)

In [None]:
tensor_tkn_train_it = torch.cat([l for l in lst_tkn_train_it], dim = 0)
tensor_tkn_test_it = torch.cat([l for l in lst_tkn_test_it], dim = 0)

In [None]:
tensor_att_train_en = torch.stack([l for l in lst_att_train_en], dim = 0)
tensor_att_eval_en = torch.stack([l for l in lst_att_eval_en], dim = 0)
tensor_att_test_en = torch.stack([l for l in lst_att_test_en], dim = 0)

In [None]:
tensor_att_train_aux = torch.stack([l for l in lst_att_train_aux], dim = 0)
tensor_att_eval_aux = torch.stack([l for l in lst_att_eval_aux], dim = 0)
# tensor_att_test_aux = torch.stack([l for l in lst_att_test_aux], dim = 0)

In [None]:
tensor_att_train_th = torch.stack([l for l in lst_att_train_th], dim = 0)
tensor_att_test_th = torch.stack([l for l in lst_att_test_th], dim = 0)

In [None]:
tensor_att_train_fa = torch.stack([l for l in lst_att_train_fa], dim = 0)
tensor_att_test_fa = torch.stack([l for l in lst_att_test_fa], dim = 0)

In [None]:
tensor_att_train_it = torch.stack([l for l in lst_att_train_it], dim = 0)
tensor_att_test_it = torch.stack([l for l in lst_att_test_it], dim = 0)

In [None]:
train_en_slot_ids = torch.tensor(lst_slot_train_en)
eval_en_slot_ids = torch.tensor(lst_slot_eval_en)
test_en_slot_ids = torch.tensor(lst_slot_test_en)

In [None]:
train_aux_slot_ids = torch.tensor(lst_slot_train_aux)
eval_aux_slot_ids = torch.tensor(lst_slot_eval_aux)
# test_aux_slot_ids = torch.tensor(lst_slot_test_aux)

In [None]:
train_th_slot_ids = torch.tensor(lst_slot_train_th)
test_th_slot_ids = torch.tensor(lst_slot_test_th)

In [None]:
train_fa_slot_ids = torch.tensor(lst_slot_train_fa)
test_fa_slot_ids = torch.tensor(lst_slot_test_fa)

In [None]:
train_it_slot_ids = torch.tensor(lst_slot_train_it)
test_it_slot_ids = torch.tensor(lst_slot_test_it)

In [None]:
batch_size = 8

## EN

In [None]:
train_en_dataset = TensorDataset(tensor_tkn_train_en, tensor_att_train_en, train_en_slot_ids, intent_label_train_en)
train_en_loader = DataLoader(train_en_dataset, batch_size=batch_size, shuffle = True)

eval_en_dataset = TensorDataset(tensor_tkn_eval_en, tensor_att_eval_en, eval_en_slot_ids, intent_label_eval_en)
eval_en_loader = DataLoader(eval_en_dataset, batch_size=batch_size, shuffle = True)

In [None]:
test_en_dataset = TensorDataset(tensor_tkn_test_en, tensor_att_test_en, test_en_slot_ids, intent_label_test_en)
bs = 1 #len(test_en_dataset)
test_en_loader = DataLoader(test_en_dataset, batch_size=bs, shuffle = False)

## ES

In [None]:
train_aux_dataset = TensorDataset(tensor_tkn_train_aux, tensor_att_train_aux, train_aux_slot_ids, intent_label_train_aux)
train_aux_loader = DataLoader(train_aux_dataset, batch_size=batch_size, shuffle = True)

eval_aux_dataset = TensorDataset(tensor_tkn_eval_aux, tensor_att_eval_aux, eval_aux_slot_ids, intent_label_eval_aux)
eval_aux_loader = DataLoader(eval_aux_dataset, batch_size=batch_size, shuffle = True)

In [None]:
test_aux_dataset = TensorDataset(tensor_tkn_test_aux, tensor_att_test_aux, test_aux_slot_ids, intent_label_test_aux)
bs = 1 #len(test_aux_dataset)
test_aux_loader = DataLoader(test_aux_dataset, batch_size=bs, shuffle = False)

## others

In [None]:
train_th_dataset = TensorDataset(tensor_tkn_train_th, tensor_att_train_th, train_th_slot_ids, intent_label_train_th)
train_th_loader = DataLoader(train_th_dataset, batch_size=batch_size, shuffle = True)

In [None]:
test_th_dataset = TensorDataset(tensor_tkn_test_th, tensor_att_test_th, test_th_slot_ids, intent_label_test_th)
bs = 1  #len(test_th_dataset)
test_th_loader = DataLoader(test_th_dataset, batch_size=bs, shuffle = True)

In [None]:
train_fa_dataset = TensorDataset(tensor_tkn_train_fa, tensor_att_train_fa, train_fa_slot_ids, intent_label_train_fa)
train_fa_loader = DataLoader(train_fa_dataset, batch_size=batch_size, shuffle = True)

In [None]:
test_fa_dataset = TensorDataset(tensor_tkn_test_fa, tensor_att_test_fa, test_fa_slot_ids, intent_label_test_fa)
bs = 1  #len(test_fa_dataset)
test_fa_loader = DataLoader(test_fa_dataset, batch_size=bs, shuffle = True)

In [None]:
train_it_dataset = TensorDataset(tensor_tkn_train_it, tensor_att_train_it, train_it_slot_ids, intent_label_train_it)
train_it_loader = DataLoader(train_it_dataset, batch_size=batch_size, shuffle = True)

In [None]:
test_it_dataset = TensorDataset(tensor_tkn_test_it, tensor_att_test_it, test_it_slot_ids, intent_label_test_it)
bs = 1  #len(test_it_dataset)
test_it_loader = DataLoader(test_it_dataset, batch_size=bs, shuffle = True)

# Adversarial Learning

## Main

In [None]:
start_epoch = 0
num_epochs = 15
k_steps = 3
alpha = 0.5; beta = 0.5
d = 1
t1 = 1; t2 = 1

In [None]:
import numpy as np
from numpy import mean

L1, L2, L3, L4, L5 = [], [], [], [], []
# L1_val, L2_val, L3_val = [], [], []

for epoch in range(start_epoch, num_epochs):

  encoder_model.train()
  disc_.train()

  loss_T = []; loss_D = []; loss_Dec = []
  loss_SF, loss_ID = [], []
  i=0
  for batch_en, batch_aux in zip(train_en_loader, train_aux_loader):

      if i<k_steps:

        x_en, y_en, _, _ = batch_en
        x_en = x_en.to(device)
        y_en = y_en.to(device)

        x_aux, y_aux, _, _ = batch_aux
        x_aux = x_aux.to(device)
        y_aux = y_aux.to(device)

        data_gen_en = encoder_model(x_en, y_en)

        data_gen_aux = encoder_model(x_aux, y_aux)

        disc_gen_en = disc_(data_gen_en)
        disc_gen_aux = disc_(data_gen_aux)

        disc_.zero_grad()

        loss_disc_real = criterion(disc_gen_en, torch.ones_like(disc_gen_en))
        loss_disc_fake = criterion(disc_gen_aux, torch.zeros_like(disc_gen_en))
        loss_disc = 0.5*(loss_disc_real + loss_disc_fake)
        loss_D.append(loss_disc.item())

        loss_disc.backward(retain_graph=True)

        opt_disc.step()
        i = i + 1

      else:
        x_en_t, y_en_t, z_en_t, w_en_t = batch_en
        x_en_t = x_en_t.to(device)
        y_en_t = y_en_t.to(device)
        z_en_t = z_en_t.to(device)
        w_en_t = w_en_t.to(device)

        x_aux_t, y_aux_t, z_aux, w_aux = batch_aux
        x_aux_t = x_aux_t.to(device)
        y_aux_t = y_aux_t.to(device)
        z_aux = z_aux.to(device)
        w_aux = w_aux.to(device)

        encoder_model.zero_grad()

        with torch.no_grad():
          outputs = encoder_model(x_en_t, y_en_t)
          hidden_states = outputs[2][1:]

        data_gen_en_t = hidden_states[-1]

        with torch.no_grad():
          outputs = encoder_model(x_aux_t, y_aux_t)
          hidden_states = outputs[2][1:]

        data_gen_aux_t = hidden_states[-1]

        disc_gen_en_t = disc_(data_gen_en_t)
        disc_gen_aux_t = disc_(data_gen_aux_t)
        ### Disc. loss:
        loss_disc_real_t= criterion(disc_gen_en_t, torch.ones_like(disc_gen_en_t))
        loss_disc_fake_t = criterion(disc_gen_aux_t, torch.zeros_like(disc_gen_aux_t))
        loss_disc_t = 0.5*(loss_disc_real_t + loss_disc_fake_t)
        loss_D.append(loss_disc_t.item())
        # ## Decoder loss:
        decoder_output_en = decoder_model(x_en_t, data_gen_en_t)
        decoder_output_aux = decoder_model(x_aux_t, data_gen_aux_t)

        r_loss_en = reconstruction_loss(torch.transpose(decoder_output_en, 1, 2), x_en_t)
        r_loss_aux = reconstruction_loss(torch.transpose(decoder_output_aux, 1, 2), x_aux_t)
        r_loss = alpha*r_loss_en + beta*r_loss_aux
        loss_Dec.append(r_loss.item())
        ###
        d_loss = criterion(disc_gen_aux_t, torch.ones_like(disc_gen_aux_t))
        # loss_total = (1/4)*(d*d_loss + t1*sf_loss + t2*id_loss + r*r_loss)
        # loss_total = (1/3)*(d*d_loss + t1*sf_loss + t2*id_loss)
        # loss_total = 0.5*sf_loss + 0.5*id_loss - loss_disc_t
        loss_total = 0.5*(d_loss + r_loss)

        loss_T.append(loss_total.item())

        loss_total.backward()

        opt_gen.step()
        i=0

  loss_T_per_epoch = mean(loss_T); L1.append(loss_T_per_epoch)
  loss_D_per_epoch = mean(loss_D); L2.append(loss_D_per_epoch)
  loss_Dec_per_epoch = mean(loss_Dec); L5.append(loss_Dec_per_epoch)

  print(f'Epoch {epoch}, \
  train_loss_total:{loss_T_per_epoch: .5f},\
  train_loss_disc:{loss_D_per_epoch: .5f},\
  train_loss_Dec: {loss_Dec_per_epoch}')


#################################################################
#################################################################
  # if (epoch + 1) % 12 == 0: #save on 11, 23, 35, etc.
  #   save_gan_model(encoder_model, disc_, decoder_model, opt_gen, opt_disc, opt_decoder, epoch, 'checkpoint'+str(epoch)+'.pth')
#################################################################
#################################################################
  ##### Validation ##### not completed for this scenario
  # encoder_model.eval()
  # decoder_model.eval()
  # disc_.eval()

  # val_loss_T = []; val_loss_D = []; val_loss_Dec = []

  # with torch.no_grad():

  #   for batch_en_val, batch_aux_val in zip(eval_en_loader, eval_aux_loader):

  #       x_en_val, y_en_val, _, _ = batch_en_val
  #       x_en_val = x_en_val.to(device)
  #       y_en_val = y_en_val.to(device)

  #       x_aux_val, y_aux_val, _, _ = batch_aux_val
  #       x_aux_val = x_aux_val.to(device)
  #       y_aux_val = y_aux_val.to(device)

  #       data_gen_en_val = encoder_model(x_en_val, y_en_val)
  #       data_gen_aux_val = encoder_model(x_aux_val, y_aux_val)

  #       disc_gen_en_val = disc_(data_gen_en_val)
  #       disc_gen_aux_val = disc_(data_gen_aux_val)

  #       loss_disc_real_val = criterion(disc_gen_en_val, torch.ones_like(disc_gen_en_val))
  #       loss_disc_fake_val = criterion(disc_gen_aux_val, torch.zeros_like(disc_gen_aux_val))
  #       loss_disc_val = 0.5 * (loss_disc_real_val + loss_disc_fake_val)
  #       val_loss_D.append(loss_disc_val.item())

  #       decoder_output_en_val = decoder_model(x_en_val, data_gen_en_val)
  #       decoder_output_aux_val = decoder_model(x_aux_val, data_gen_aux_val)

  #       r_loss_en_val = reconstruction_loss(torch.transpose(decoder_output_en_val, 1, 2), x_en_val)
  #       r_loss_aux_val = reconstruction_loss(torch.transpose(decoder_output_aux_val, 1, 2), x_aux_val)
  #       r_loss_val = alpha * r_loss_en_val + beta * r_loss_aux_val
  #       val_loss_Dec.append(r_loss_val.item())

  #       val_loss_total = eta * r_loss_val - lambda_coef * loss_disc_val
  #       val_loss_T.append(val_loss_total.item())

  # val_loss_T_per_epoch = mean(val_loss_T); L1_val.append(val_loss_T_per_epoch)
  # val_loss_D_per_epoch = mean(val_loss_D); L2_val.append(val_loss_D_per_epoch)
  # val_loss_Dec_per_epoch = mean(val_loss_Dec); L3_val.append(val_loss_Dec_per_epoch)

  # print(f'Epoch {epoch}, \
  # eval_loss_total:{val_loss_T_per_epoch: .5f},\
  # eval_loss_disc:{val_loss_D_per_epoch: .5f},\
  # eval_loss_dec.:{val_loss_Dec_per_epoch: .5f}')

  print('*****')

## Plots

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(5, 5))
plt.plot(L1, ".-b", label="loss_total")
plt.plot(L2, "-<r", label="loss_disc")

ax = plt.gca()
plt.xlabel("epoch")  # Add x-axis label
plt.legend(loc="center right")
# plt.subplots_adjust(right=0.8, top=0.9)
# plt.legend(loc="upper right")
# plt.savefig('loss.jpeg', dpi=1000, bbox_inches='tight')
plt.show()

# Slot Filling and Intent Detection classes

In [None]:
class LSTMTagger(nn.Module):
  def __init__(self, in_features, hidden_size, num_layers, tagset_size): #in_feautures: img_dim #, tagset_size
    super(LSTMTagger, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.tagset_size = tagset_size

    self.lstm = nn.LSTM(in_features, hidden_size, num_layers, batch_first = True, bidirectional=True)
    self.hidden2tag = nn.Linear(hidden_size*2, tagset_size)

  def forward(self, x):

    h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).to(device)
    c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).to(device)

    out, _ = self.lstm(x, (h0, c0))
    tag_space = self.hidden2tag(out) #out.view(x.size(1), -1)
    tag_scores = F.log_softmax(tag_space, dim=1)

    return tag_scores

In [None]:
class IntentClassifier(nn.Module):
  def __init__(self):
    super().__init__()
    self.l1 = nn.Linear(1024, 64) #512
    self.l2 = nn.Linear(64, 64)
    self.l3 = nn.Linear(64, 12) #number of intents: 12, 26
    self.do = nn.Dropout(0.1)

  def forward(self, x):
    h1 = nn.functional.relu(self.l1(x))
    h2 = nn.functional.relu(self.l2(h1))
    do = self.do(h2 + h1)
    logits = self.l3(do)
    # probabs = F.softmax(logits, dim=-1)
    return logits

In [None]:
# Slot Filling
in_features = 1024 #512
hidden_size = 256
num_layers = 2
tagset_size = len(slot_dict)
slot_tagger = LSTMTagger(in_features, hidden_size, num_layers, tagset_size).to(device)
loss_sf = nn.CrossEntropyLoss(ignore_index=-1)
# opt_sf = optim.SGD(slot_tagger.parameters(), lr=2e-3)

In [None]:
# Intent Detection (ID)
classifier = IntentClassifier().to(device)
params_id = classifier.parameters()
loss_id = nn.CrossEntropyLoss(ignore_index=-1)
# opt_id = optim.SGD(params_id, lr=2e-3)

In [None]:
def extract_cls(L): #L: the output of Generator
  x_cls = [L[i][0] for i in range(len(L))]
  final_x = torch.stack(x_cls, dim=0)
  return final_x

# Metrics

In [None]:
def convert_label_id_to_name_true_(list_of_labels, slot_dict, ignore_index):
  name_list = []
  for L in list_of_labels:
    name_list.append([list(slot_dict.keys())[list(slot_dict.values()).index(i)] for i in L if i != ignore_index])
  return name_list

In [None]:
def convert_label_id_to_name_predict_(predict_labels, true_labels):
  predictions = []
  for prediction, label in zip(predict_labels, true_labels):
    ll = []
    for (p, l) in zip(prediction, label):
      # print(p)
      # print(l)
      if p == 23: p = 0
      if l != -1:
        ll.append(list(slot_dict.keys())[list(slot_dict.values()).index(p)])
        # print(list(slot_dict.keys())[list(slot_dict.values()).index(p)])
    predictions.append(ll)
    # print(ll, len(ll))
  return(predictions)

In [None]:
!pip install seqeval

In [None]:
from seqeval.metrics import classification_report
# from seqeval.metrics import f1_score, precision_score, recall_score, accuracy_score

# Fine-tune SF and ID only by English data

In [None]:
epoch_num = 10
sf_coef = 0.5
id_coef = 0.5

In [None]:
from numpy import mean
loss_epoch_encoder_lst, loss_epoch_sf_lst, loss_epoch_id_lst = [], [], []

for j in range(epoch_num):

  loss_encoder_lst, loss_sf_lst, loss_id_lst = [], [], []
  slot_tagger.train()
  classifier.train()
  encoder_model.train()

  for batch in train_en_loader:
    x, y, z, w = batch
    x = x.to(device)
    y = y.to(device)
    z = z.to(device)
    w = w.to(device)

    g_x = encoder_model(x, y)
    sf_x = slot_tagger(g_x)

    id_x = classifier(extract_cls(g_x))

    l_sf = torch.transpose(sf_x, 1, 2)
    # l_id = torch.transpose(id_x, 1, 2)

    encoder_model.zero_grad()
    slot_tagger.zero_grad()
    classifier.zero_grad()

    J_sf = loss_sf(l_sf, z)
    J_id = loss_id(id_x, w)
    J = sf_coef*J_sf + id_coef*J_id

    J.backward() #retain_graph = True
    opt_gen.step()
    #
    # slot_tagger.zero_grad()
    # g_x = encoder_model(x, y)
    # sf_x = slot_tagger(g_x)
    # l_sf = torch.transpose(sf_x, 1, 2)
    # J_sf = loss_sf(l_sf, z)
    # J_sf.backward()
    # opt_sf.step()
    # #
    # classifier.zero_grad()
    # g_x = encoder_model(x, y)
    # id_x = classifier(extract_cls(g_x))
    # J_id = loss_id(id_x, w)
    # J_id.backward()
    # opt_id.step()

    loss_encoder_lst.append(J.item())
    loss_sf_lst.append(J_sf.item())
    loss_id_lst.append(J_id.item())

  loss_epoch_encoder = mean(loss_encoder_lst)
  loss_epoch_encoder_lst.append(loss_epoch_encoder)
  #
  loss_epoch_sf = mean(loss_sf_lst)
  loss_epoch_sf_lst.append(loss_epoch_sf)
  #
  loss_epoch_id = mean(loss_id_lst)
  loss_epoch_id_lst.append(loss_epoch_id)

  print(loss_epoch_encoder, loss_epoch_sf, loss_epoch_id)

# Test (finally without training SF and ID) - EN, ES

In [None]:
all_sl_true_en = torch.tensor([], dtype=torch.long)
all_sl_pred_en = torch.tensor([], dtype=torch.long)
all_int_true_en = torch.tensor([], dtype=torch.long)
all_int_pred_en = torch.tensor([], dtype=torch.long)

all_sl_true_aux = torch.tensor([], dtype=torch.long)
all_sl_pred_aux = torch.tensor([], dtype=torch.long)
all_int_true_aux = torch.tensor([], dtype=torch.long)
all_int_pred_aux = torch.tensor([], dtype=torch.long)

ii = 0

with torch.no_grad():
    slot_tagger.eval()
    classifier.eval()
    gen_.eval()

    for batch_en, batch_aux in zip(test_en_loader, test_aux_loader):
        ii = ii + 1
        x_en, y_en, sl_en, int_en = batch_en
        x_en = x_en.to(device)
        y_en = y_en.to(device)
        sl_en = sl_en.to(device)
        int_en = int_en.to(device)

        x_aux, y_aux, sl_aux, int_aux = batch_aux
        x_aux = x_aux.to(device)
        y_aux = y_aux.to(device)
        sl_aux = sl_aux.to(device)
        int_aux = int_aux.to(device)


        with torch.no_grad():
          outputs = encoder_model(x_en, y_en)
          hidden_states = outputs[2][1:]

        data_enc_en = hidden_states[-1]

        g_test_en = gen_(data_enc_en)

        with torch.no_grad():
          outputs = encoder_model(x_aux, y_aux)
          hidden_states = outputs[2][1:]

        data_enc_aux = hidden_states[-1]

        g_test_aux = gen_(data_enc_aux)


        out_test_en = slot_tagger(g_test_en)
        out_test_aux = slot_tagger(g_test_aux)
        ####
        out_test_en_ID = classifier(extract_cls(g_test_en))
        out_test_aux_ID = classifier(extract_cls(g_test_aux))
        ####

        # # Append predictions and true labels to accumulated tensors
        all_sl_true_en = torch.cat((all_sl_true_en.to('cpu'), sl_en.to('cpu')), dim=0)
        all_sl_pred_en = torch.cat((all_sl_pred_en.to('cpu'), out_test_en.to('cpu')), dim=0)
        all_int_true_en = torch.cat((all_int_true_en.to('cpu'), int_en.to('cpu')), dim=0)
        all_int_pred_en = torch.cat((all_int_pred_en.to('cpu'), out_test_en_ID.to('cpu')), dim=0)

        all_sl_true_aux = torch.cat((all_sl_true_aux.to('cpu'), sl_aux.to('cpu')), dim=0)
        all_sl_pred_aux = torch.cat((all_sl_pred_aux.to('cpu'), out_test_aux.to('cpu')), dim=0)
        all_int_true_aux = torch.cat((all_int_true_aux.to('cpu'), int_aux.to('cpu')), dim=0)
        all_int_pred_aux = torch.cat((all_int_pred_aux.to('cpu'), out_test_aux_ID.to('cpu')), dim=0)


In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

# Convert tensors to numpy arrays
true_labels_en = all_int_true_en.numpy()
predicted_labels_en = all_int_pred_en.numpy().argmax(axis=1)

# Calculate precision, recall, and accuracy
precision_en = precision_score(true_labels_en, predicted_labels_en, average='micro')
precision_en_m = precision_score(true_labels_en, predicted_labels_en, average='macro')
recall_en = recall_score(true_labels_en, predicted_labels_en, average='micro')
recall_en_m = recall_score(true_labels_en, predicted_labels_en, average='macro')
accuracy_en = accuracy_score(true_labels_en, predicted_labels_en)

# Print the results
print(f'Precision: {precision_en, precision_en_m}')
print(f'Recall: {recall_en, recall_en_m}')
print(f'Accuracy: {accuracy_en}')

In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

true_labels_aux = all_int_true_aux.numpy()
predicted_labels_aux = all_int_pred_aux.numpy().argmax(axis=1)

precision_aux = precision_score(true_labels_aux, predicted_labels_aux, average='micro')
precision_aux_m = precision_score(true_labels_aux, predicted_labels_aux, average='macro')

recall_aux = recall_score(true_labels_aux, predicted_labels_aux, average='micro')
recall_aux_m = recall_score(true_labels_aux, predicted_labels_aux, average='macro')

accuracy_aux = accuracy_score(true_labels_aux, predicted_labels_aux)
accuracy_aux = accuracy_score(true_labels_aux, predicted_labels_aux)

# Print the results
print(f'Precision: {precision_aux, precision_aux_m}')
print(f'Recall: {recall_aux, recall_aux_m}')
print(f'Accuracy: {accuracy_aux}')

In [None]:
true_labels = all_sl_true_en.tolist()
predict_labels = all_sl_pred_en.detach().argmax(dim=2).tolist()
true_labels_names = convert_label_id_to_name_true_(true_labels , slot_dict, -1)
predict_labels_names = convert_label_id_to_name_predict_(predict_labels, true_labels)
print(classification_report(true_labels_names, predict_labels_names))

In [None]:
true_labels_aux = all_sl_true_aux.tolist()
predict_labels_aux = all_sl_pred_aux.detach().argmax(dim=2).tolist()
true_labels_names_aux = convert_label_id_to_name_true_(true_labels_aux , slot_dict, -1)
predict_labels_names_aux = convert_label_id_to_name_predict_(predict_labels_aux, true_labels_aux)
print(classification_report(true_labels_names_aux, predict_labels_names_aux))

# Test (finally without training SF and ID) - TH

In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

all_sl_true_th = torch.tensor([], dtype=torch.long)
all_sl_pred_th = torch.tensor([], dtype=torch.long)
all_int_true_th = torch.tensor([], dtype=torch.long)
all_int_pred_th = torch.tensor([], dtype=torch.long)

ii = 0

with torch.no_grad():
    slot_tagger.eval()
    classifier.eval()
    encoder_model.eval()
    for batch_th in test_th_loader:
        ii = ii + 1

        x_th, y_th, sl_th, int_th = batch_th
        x_th = x_th.to(device)
        y_th = y_th.to(device)
        sl_th = sl_th.to(device)
        int_th = int_th.to(device)

        g_test_th = encoder_model(x_th, y_th)
        # g_test_th = gen_(g_test_th)

        out_test_th = slot_tagger(g_test_th)
        ####
        out_test_th_ID = classifier(extract_cls(g_test_th))
        ####

        all_sl_true_th = torch.cat((all_sl_true_th.to('cpu'), sl_th.to('cpu')), dim=0)
        all_sl_pred_th = torch.cat((all_sl_pred_th.to('cpu'), out_test_th.to('cpu')), dim=0)
        all_int_true_th = torch.cat((all_int_true_th.to('cpu'), int_th.to('cpu')), dim=0)
        all_int_pred_th = torch.cat((all_int_pred_th.to('cpu'), out_test_th_ID.to('cpu')), dim=0)


In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

true_labels_th = all_int_true_th.numpy()
predicted_labels_th = all_int_pred_th.numpy().argmax(axis=1)

precision_th = precision_score(true_labels_th, predicted_labels_th, average='micro')
precision_th_m = precision_score(true_labels_th, predicted_labels_th, average='macro')

recall_th = recall_score(true_labels_th, predicted_labels_th, average='micro')
recall_th_m = recall_score(true_labels_th, predicted_labels_th, average='macro')

accuracy_th = accuracy_score(true_labels_th, predicted_labels_th)
accuracy_th = accuracy_score(true_labels_th, predicted_labels_th)

# Print the results
print(f'Precision: {precision_th, precision_th_m}')
print(f'Recall: {recall_th, recall_th_m}')
print(f'Accuracy: {accuracy_th}')

In [None]:
true_labels_th = all_sl_true_th.tolist()
predict_labels_th = all_sl_pred_th.detach().argmax(dim=2).tolist()
true_labels_names_th = convert_label_id_to_name_true_(true_labels_th , slot_dict, -1)
predict_labels_names_th = convert_label_id_to_name_predict_(predict_labels_th, true_labels_th)
print(classification_report(true_labels_names_th, predict_labels_names_th))

# Test (finally without training SF and ID) - FA

In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

all_sl_true_fa = torch.tensor([], dtype=torch.long)
all_sl_pred_fa = torch.tensor([], dtype=torch.long)
all_int_true_fa = torch.tensor([], dtype=torch.long)
all_int_pred_fa = torch.tensor([], dtype=torch.long)

ii = 0

with torch.no_grad():
    slot_tagger.eval()
    classifier.eval()
    encoder_model.eval()
    for batch_fa in test_fa_loader:
        ii = ii + 1

        x_fa, y_fa, sl_fa, int_fa = batch_fa
        x_fa = x_fa.to(device)
        y_fa = y_fa.to(device)
        sl_fa = sl_fa.to(device)
        int_fa = int_fa.to(device)

        g_test_fa = encoder_model(x_fa, y_fa)
        # g_test_fa = gen_(g_test_fa)

        out_test_fa = slot_tagger(g_test_fa)
        ####
        out_test_fa_ID = classifier(extract_cls(g_test_fa))
        ####

        all_sl_true_fa = torch.cat((all_sl_true_fa.to('cpu'), sl_fa.to('cpu')), dim=0)
        all_sl_pred_fa = torch.cat((all_sl_pred_fa.to('cpu'), out_test_fa.to('cpu')), dim=0)
        all_int_true_fa = torch.cat((all_int_true_fa.to('cpu'), int_fa.to('cpu')), dim=0)
        all_int_pred_fa = torch.cat((all_int_pred_fa.to('cpu'), out_test_fa_ID.to('cpu')), dim=0)


In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

true_labels_fa = all_int_true_fa.numpy()
predicted_labels_fa = all_int_pred_fa.numpy().argmax(axis=1)

precision_fa = precision_score(true_labels_fa, predicted_labels_fa, average='micro')
precision_fa_m = precision_score(true_labels_fa, predicted_labels_fa, average='macro')

recall_fa = recall_score(true_labels_fa, predicted_labels_fa, average='micro')
recall_fa_m = recall_score(true_labels_fa, predicted_labels_fa, average='macro')

accuracy_fa = accuracy_score(true_labels_fa, predicted_labels_fa)
accuracy_fa = accuracy_score(true_labels_fa, predicted_labels_fa)

print(f'Precision: {precision_fa, precision_fa_m}')
print(f'Recall: {recall_fa, recall_fa_m}')
print(f'Accuracy: {accuracy_fa}')

In [None]:
true_labels_fa = all_sl_true_fa.tolist()
predict_labels_fa = all_sl_pred_fa.detach().argmax(dim=2).tolist()
true_labels_names_fa = convert_label_id_to_name_true_(true_labels_fa , slot_dict, -1)
predict_labels_names_fa = convert_label_id_to_name_predict_(predict_labels_fa, true_labels_fa)
print(classification_report(true_labels_names_fa, predict_labels_names_fa))

# Test (finally without training SF and ID) - IT

In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

all_sl_true_it = torch.tensor([], dtype=torch.long)
all_sl_pred_it = torch.tensor([], dtype=torch.long)
all_int_true_it = torch.tensor([], dtype=torch.long)
all_int_pred_it = torch.tensor([], dtype=torch.long)

ii = 0

with torch.no_grad():
    slot_tagger.eval()
    classifier.eval()
    encoder_model.eval()
    for batch_it in test_it_loader:
        ii = ii + 1

        x_it, y_it, sl_it, int_it = batch_it
        x_it = x_it.to(device)
        y_it = y_it.to(device)
        sl_it = sl_it.to(device)
        int_it = int_it.to(device)

        g_test_it = encoder_model(x_it, y_it)
        # g_test_it = gen_(g_test_it)

        out_test_it = slot_tagger(g_test_it)
        ####
        out_test_it_ID = classifier(extract_cls(g_test_it))
        ####

        all_sl_true_it = torch.cat((all_sl_true_it.to('cpu'), sl_it.to('cpu')), dim=0)
        all_sl_pred_it = torch.cat((all_sl_pred_it.to('cpu'), out_test_it.to('cpu')), dim=0)
        all_int_true_it = torch.cat((all_int_true_it.to('cpu'), int_it.to('cpu')), dim=0)
        all_int_pred_it = torch.cat((all_int_pred_it.to('cpu'), out_test_it_ID.to('cpu')), dim=0)


In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

true_labels_it = all_int_true_it.numpy()
predicted_labels_it = all_int_pred_it.numpy().argmax(axis=1)

precision_it = precision_score(true_labels_it, predicted_labels_it, average='micro')
precision_it_m = precision_score(true_labels_it, predicted_labels_it, average='macro')

recall_it = recall_score(true_labels_it, predicted_labels_it, average='micro')
recall_it_m = recall_score(true_labels_it, predicted_labels_it, average='macro')

accuracy_it = accuracy_score(true_labels_it, predicted_labels_it)
accuracy_it = accuracy_score(true_labels_it, predicted_labels_it)

print(f'Precision: {precision_it, precision_it_m}')
print(f'Recall: {recall_it, recall_it_m}')
print(f'Accuracy: {accuracy_it}')

In [None]:
true_labels_it = all_sl_true_it.tolist()
predict_labels_it = all_sl_pred_it.detach().argmax(dim=2).tolist()
true_labels_names_it = convert_label_id_to_name_true_(true_labels_it , slot_dict, -1)
predict_labels_names_it = convert_label_id_to_name_predict_(predict_labels_it, true_labels_it)
print(classification_report(true_labels_names_it, predict_labels_names_it))