# Importing Libraries

In [1]:
from utils import *
from sklearn.manifold import TSNE
from datetime import datetime
import time
import math

In [2]:
#pretty plot
#sns.set_style("darkgrid")
a4_dims = (11.7,11.7)
sns.set_context("paper", font_scale = 1.5, rc={"lines.linewidth":2.5})

In [3]:
MAX_LENGTH = 50
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Reading the Data In
We are using the Air Travel Information System (ATIS) dataset(I believe it is originally from UPenn, need to verify)
It uses the Inside-Out-Beginning tag format(IOB) described here : https://en.wikipedia.org/wiki/Inside%E2%80%93outside%E2%80%93beginning_(tagging)

The typical structure is:

Sentence (tokens), Slots to match each token, Intent.

Found a preprocessed atis file from, thanks to: https://github.com/D2KLab/botcycle/tree/master/nlu/data/atis


In [4]:
with open('data/preprocessed/fold_train.json', 'r') as f:
    atis_json = json.load(f)

In [None]:
#atis_json['data'][3130] #total 4978


# Exploratory Data Analysis of training set

In [5]:
word_counts = {}
slot_counts = {}
intent_counts = {}
max_len_w_idx = [(0,atis_json['data'][0]['length'])]
num_tokens = 0
idx = 0
for atis_entry in atis_json['data']:
    num_tokens+=atis_entry['length']
    if atis_entry['length'] > max_len_w_idx[0][1]:
        max_len_w_idx[0] = (idx,atis_entry['length'])
    if atis_entry['intent'] not in intent_counts.keys():
            intent_counts[atis_entry['intent']] = 1
    else:
        intent_counts[atis_entry['intent']]+= 1
    for slot in atis_entry['slots']:
        if slot not in slot_counts.keys():
            slot_counts[slot] = 1
        else:
            slot_counts[slot]+= 1
    for word in atis_entry['words']:
        if word not in word_counts.keys():
            word_counts[word] = 1
        else:
            word_counts[word]+= 1
    idx+=1
    
slot_counts
intent_counts
max_len_w_idx

[(3130, 46)]

In [25]:
word_stats_df = pd.DataFrame(word_counts,index=[0])
word_stats_df.T.head()
word_stats_df.T.columns
word_stats_df.T.loc[word_stats_df.T[0].idxmax()]

0    3883
Name: to, dtype: int64

In [26]:
word_stats_df.T.head()

Unnamed: 0,0
i,659
want,148
to,3883
fly,250
from,3343


## Distribution of Slots 

In [None]:
slot_stats_df = pd.DataFrame(slot_counts,index=[0])
labels = ['O'] #this is useless and many exist in all queries need not include.
slot_stats_df = slot_stats_df.drop(['O'],axis=1)
slot_stats_df.T.head()

In [None]:
#https://stackoverflow.com/questions/31029560/plotting-categorical-data-with-pandas-and-matplotlib
new_dims = (23,15)
fig, ax = plt.subplots(figsize=new_dims)
slot_stats_df.T[0].plot(kind='bar')
fig = ax.get_figure()
plt.title('Distribution of slots in ATIS')
fig.savefig("graphs/slot_dist.png",bbox_inches="tight",dpi=300)

## Distribution of Intents

In [None]:
intent_stats_df = pd.DataFrame(intent_counts,index=[0])
labels = ['O'] #this is useless and many exist in all queries need not include.
intent_stats_df = intent_stats_df#.drop(['O'],axis=1)
intent_stats_df.T.head()

In [None]:
fig, ax = plt.subplots(figsize=a4_dims)
intent_stats_df.T[0].plot(kind='bar')
fig = ax.get_figure()
plt.title('Distribution of intents in ATIS')
fig.savefig("graphs/intent_dist.png",bbox_inches="tight",dpi=300)

# Data Preprocessing

In [None]:
def flatten(list_of_lists):
    """Flattens from two-dimensional list to one-dimensional list"""
    return [item for sublist in list_of_lists for item in sublist]

In [None]:
def adjust_sequences(data, length=MAX_LENGTH):
    """
    Fixes the input and output sequences length, adding padding or truncating if necessary
    :param data json file containing entries from atis dataset.
    :param length the fixed length of the sentence.
    """
    for sample in data['data']:
        # adjust the sequence of input words
        if len(sample['words']) < length:
            # add <EOS> and <PAD> if sentence is shorter than maximum length
            sample['words'].append('<EOS>')
            while len(sample['words']) < length:
                sample['words'].append('<PAD>')
        else:
            # otherwise truncate and add <EOS> at last position
            sample['words'] = sample['words'][:length]
            sample['words'][-1] = '<EOS>'

        # adjust in the same way the sequence of output slots
        if len(sample['slots']) < length:
            sample['slots'].append('<EOS>')
            while len(sample['slots']) < length:
                sample['slots'].append('<PAD>')
        else:
            sample['slots'] = sample['slots'][:length]
            sample['slots'][-1] = '<EOS>'

    return data

In [None]:
def get_vocabularies(train_data):
    """
    Collect the input vocabulary, the slot vocabulary and the intent vocabulary
    :param train_data the training data containing words,slots and intent.
    """
    # from a list of training examples, get three lists (columns)
    data = train_data['data']
    seq_in = [sample['words'] for sample in data]
    vocab = flatten(seq_in)
    # removing duplicated but keeping the order
    v = ['<PAD>', '<SOS>', '<EOS>'] + vocab
    vocab = sorted(set(v), key=lambda x: v.index(x)) # https://docs.python.org/3.3/howto/sorting.html
    s = ['<PAD>', '<SOS>', '<EOS>'] + train_data['meta']['slot_types']
    slot_tag = sorted(set(s), key=lambda x: s.index(x))
    i = train_data['meta']['intent_types']
    intent_tag = sorted(set(i), key=lambda x: i.index(x))

    return vocab, slot_tag, intent_tag

In [None]:
adjusted_atis = adjust_sequences(atis_json)#padded

In [None]:
atis_vocab,atis_slots,atis_intents = get_vocabularies(adjusted_atis)

In [None]:
#atis_intents

In [None]:
len(atis_vocab),len(atis_slots),len(atis_intents)

# Next we need to embed the data and set it up for Pytorch
Remember each vocabulary for each sentence, slot and intent will have different embeddings. They are different sized vectors. The Network will try to figure out a mapping from these different vector spaces.

In [None]:
def create_mappings(vocab,forward_map):
    """
    This function takes the words in the vocabulary and creates a unique mapping to a number.
    :param vocab contains all the words in the corpus.
    :param forward_map a dictionary that will be populated with mappings.
    returns populated forward_map
    """
    for sample in vocab:
        if sample not in forward_map.keys():
            forward_map[sample]= len(forward_map)
            
    return forward_map

In [None]:
# SOS_token = 2
# EOS_token = 3
word2index = {'<PAD>': 0, '<UNK>':1,'<SOS>':2,'<EOS>':3}
create_mappings(atis_vocab,word2index)
index2word = {v:k for k,v in word2index.items()}

In [None]:
tag2index = {'<PAD>' : 0,'<UNK>':1,'<SOS>':2,'<EOS>':3}
create_mappings(atis_slots,tag2index)
index2tag = {v:k for k,v in tag2index.items()}

In [None]:
intent2index={}
create_mappings(atis_intents,intent2index)
index2intent = {v:k for k,v in intent2index.items()}

In [None]:
#tag2index

### Next we create a Tensor where each row is a mapped/embedded sequence

In [None]:
def prepare_sequence(seq_data, mapping,map_type):
    """
    :param seq a sequnce which will be embedded as a vector
    :param mapping, a dictionary which contains how each element in the seq will be mapped to a number.
    :param map_type 'words','slots' or 'intent'
    returns a Pytorch Tensor.
    """
    if map_type=='intent':
        intent = seq_data[map_type]
        embeddings = mapping[intent] if intent in mapping.keys() else -1 #mapping["<UNK>"]
        return torch.tensor(embeddings)   
    else:
        embed_fnc = lambda word: mapping[word] if word in mapping.keys() else mapping["<UNK>"]
        embeddings = list(map(embed_fnc, seq_data[map_type])) 
        return torch.LongTensor(embeddings)

In [None]:
def create_training_set(padded_atis):
    """
    :param padded_atis, this is padded sequence data.
           Of the form seq,slots,intent. This function coverts 
           these into tensors.
    return train_data; [(seq_tensor,slot_tensor,intent_tensor)]
    """
    train_data = []
    atis_data = padded_atis['data']
    for i in range(len(atis_data)):
        seq_tensor = prepare_sequence(atis_data[i],word2index,'words')
        slot_tensor = prepare_sequence(atis_data[i],tag2index,'slots')
        intent_tensor = prepare_sequence(atis_data[i],intent2index,'intent')
        train_data.append((seq_tensor,slot_tensor,intent_tensor))
    return train_data

In [None]:
train_data = create_training_set(adjusted_atis)

# Batching the data

In [None]:
def concatenate_batch(batch):
#     print(len(batch), len(batch[0]))
    #     print([ex[0] for ex in batch])
    seqs = torch.stack([ex[0] for ex in batch])
    slots = torch.stack([ex[1] for ex in batch])
    intents = torch.stack([ex[2] for ex in batch])
   
    return seqs,slots,intents

In [None]:
def get_batches(batch_size, train_data):
    """
    Returns iteratively a batch of specified size on the data. 
    The last batch can be smaller if the total size is not multiple of the batch
    """
    random.shuffle(train_data)
    sindex = 0
    eindex = batch_size
    while sindex < len(train_data):
        batch = train_data[sindex:eindex] #list of batch_size num of tuples.
        temp = eindex
        eindex = eindex + batch_size
        sindex = temp
        #print('returning', len(batch), 'samples')
        yield concatenate_batch(batch)

In [None]:
# batches = get_batches(5,train_data[:10])
# list(batches)[0][0].size()

# Building Joint -RNNless Model

In [None]:
class Encoder(nn.Module):
    """
    An encoder/decoder that
    takes a batch of sequences embeds the sequence and
    then runs it through a fully connected layer to predict slots and intent.
    """
    def __init__(self,input_dim,slot_dim,intent_dim,emb_dim):
        super(Encoder, self).__init__()
        self.input_dim = input_dim
        self.emb_dim = emb_dim
        self.slot_dim = slot_dim
        self.intent_dim = intent_dim
        
        self.embedding = nn.Embedding(input_dim, emb_dim)
        
        self.slot_fc = nn.Linear(emb_dim,slot_dim)
        self.intent_fc = nn.Linear(emb_dim,intent_dim)
        
    def forward(self,src):
        embedded = self.embedding(src)# get embedding for a word
        #print(embedded.size())
        slots = self.slot_fc(embedded)# then predict a slot using fc
        
        # Need average embedding for a sequence
        avg_embedding = embedded.mean(dim=1) #[50,5]-> [5] because don't have 50 different elements anymore just one.
        #print('embed',embedded.size())
        #print('avg embed:',avg_embedding.size())
        intent = self.intent_fc(avg_embedding)
        #print('intent pred',intent.size())
        # then put average embedding into intent fc
        
        return slots,intent,embedded

In [None]:
print("Testing models...")
n_layers = 1
INPUT_DIM = len(word2index)
SLOT_DIM = len(tag2index)
print('Inp: ',INPUT_DIM,'Slotdim: ',SLOT_DIM)
INTENT_DIM = len(index2intent)
embed_size = 5
inp = get_batches(1,train_data[:1])
encoder = Encoder(INPUT_DIM,SLOT_DIM,INTENT_DIM,embed_size)
# testing avg embedding
for data in inp:
    input,label,real_intent = data[0],data[1],data[2]#.type(torch.FloatTensor)
    slot,intent,_ = encoder(input)
    print(_.size())
    slot = torch.argmax(slot,dim=2)
    test_criterion = nn.CrossEntropyLoss()
#     print(real_intent.size(),intent.size())
#     print('real intent',real_intent,'predicted intent',intent)
    test_criterion(intent,real_intent)
    #intent = torch.argmax(intent,dim=1).type(torch.FloatTensor)
    #print(slot.size())
    #print(intent.size())
#batches = get_batches(5,train_data[:10]) # to get each representation of a word.

In [None]:
#list(encoder.parameters())

In [None]:
test_criterion = nn.CrossEntropyLoss()
real_intent,intent
test_criterion(intent,real_intent)

# Training the model & Running Experiments
Here we vary the embedding size in the embedding layer while keeping the learning rate, batch_size, optimizer 
and other network parameters constant.

In [None]:
def train_model(model,num_epochs,batch_size,optimizer,train_data,constant_params):
    loss_by_epoch = {}
    criterion = constant_params['criterion']
    model.train()
    for epoch in range(num_epochs):
        slot_losses = []
        intent_losses = []
        inp = get_batches(batch_size,train_data)
        for data in inp:
            inputs, labels, intents = data[0],data[1],data[2]
            inputs = inputs.type(torch.LongTensor).to(device)
            labels = labels.type(torch.LongTensor).view(-1).to(device) #squashing
            intents = intents.to(device)
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            out_slots, out_intents,_ = model(inputs)
            out_slots = out_slots.view(-1,constant_params['SLOT_DIM'])

            slot_loss = criterion(out_slots, labels)
            intent_loss = criterion(out_intents, intents) 
            slot_losses.append(slot_loss.item())
            intent_losses.append(intent_loss.item())
            
            slot_loss.backward(retain_graph=True)
            intent_loss.backward()
            optimizer.step()
        slot_col_name = "epoch:"+str(epoch+1)+"_slot_losses"
        intent_col_name = "epoch:"+str(epoch+1)+"_intent_losses"
        loss_by_epoch[slot_col_name] = slot_losses
        loss_by_epoch[intent_col_name] = intent_losses
        
    return loss_by_epoch #slot_losses, intent_losses,

def make_models(constant_params,vary_params):
    #make a model with a different embedding size
    model_list = []
    for i in range(len(vary_params['embed_size'])):
        model_list.append( Encoder(constant_params['INPUT_DIM'],
                                      constant_params['SLOT_DIM'],
                                      constant_params['INTENT_DIM'],
                                      vary_params['embed_size'][i]).to(device) )
    return model_list

def run_experiment(constant_params,vary_params,num_runs,exp_data,model_type='ann'):
    experiment_results = {}
    list_of_dfs = []
    for run in range(num_runs): # fill dict with keys
        for i in range(len(vary_params['embed_size'])):
            col_name = 'run_'+str(run+1)+'_embed_size_'+str(vary_params['embed_size'][i])
            experiment_results[col_name] = []
           
    for run in range(num_runs):# run actual experiments
        run_model_list = make_models(constant_params,vary_params)
        for i in range(len(run_model_list)):
            optimizer = optim.Adam(run_model_list[i].parameters(), lr=0.01)
            col_name = 'run_'+str(run+1)+'_embed_size_'+str(vary_params['embed_size'][i])
           
            loss_by_epoch = train_model(run_model_list[i],
                                                    constant_params['num_epoch'],
                                                   constant_params['batch_size'],
                                                   optimizer,
                                                   exp_data,
                                                   constant_params)
            
            result_dataframe = pd.DataFrame(loss_by_epoch,index=None)
            result_dataframe.to_csv("experiment_results/"+
                                    str(model_type)+
                                    str(run)+"_"+
                                    str(vary_params['embed_size'][i])+"_"
                                    "train_experiment.csv")
            list_of_dfs.append(result_dataframe)
            #experiment_results[col_name].append(loss_by_epoch)
            #experiment_results[col_name].append(run_intent_losses)
                
    # after experiments are over save as a .csv
#     result_dataframe = pd.DataFrame(experiment_results,index=None)
#     result_dataframe.to_csv("experiment_results/"+str(model_type)+"train_experiments.csv")
    return list_of_dfs #incase we want it

In [None]:
constant_exp_params = {'batch_size':128,
                       'num_epoch':200,
                      'INPUT_DIM':len(word2index),
                      'SLOT_DIM':len(tag2index),
                      'INTENT_DIM':len(index2intent),
                      'criterion': nn.CrossEntropyLoss(ignore_index=0,reduction='elementwise_mean')}

variable_exp_params = {'embed_size':[2,4,8,16,32,64,128,256]}
                        

In [None]:
start = time.time()
run_experiment(constant_exp_params,variable_exp_params,5,train_data,model_type='ann')
end = time.time()
elapsed = (end-start)/60.
print('Time elapsed: %.4f mins ' % (elapsed))

# =================== #

In [None]:
batch_size = 256
max_length = 50
INPUT_DIM = len(word2index)#
SLOT_DIM = len(tag2index)
INTENT_DIM = len(index2intent)
embed_size = 10 # tried 5
num_epochs = 100

In [None]:
encoder = Encoder(INPUT_DIM,SLOT_DIM,INTENT_DIM,embed_size).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=0,reduction='elementwise_mean')#ignore <pad>
optimizer = optim.Adam(encoder.parameters(), lr=0.01)

In [None]:
encoder

In [None]:
# len(atis_intents)
# len(atis_slots)
# atis_slots
# tag2index

In [None]:
start = time.time()
for epoch in range(num_epochs):
    inp = get_batches(batch_size,train_data)
    encoder.train()
    for data in inp:
        # get the inputs
        inputs, labels, intents = data[0],data[1],data[2]
        inputs = inputs.type(torch.LongTensor).to(device)
        labels = labels.type(torch.LongTensor).view(-1).to(device) #squashing
        intents = intents.to(device)#.type(torch.FloatTensor)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        out_slots, out_intents,_ = encoder(inputs)
        out_slots = out_slots.view(-1,SLOT_DIM)
        
        slot_loss = criterion(out_slots, labels)
        intent_loss = criterion(out_intents, intents) 
        
        slot_loss.backward(retain_graph=True)
        intent_loss.backward()
        optimizer.step()
    if epoch%25==0:
        print('Slot Loss after epoch '+str(epoch)+':',slot_loss.item())
        print('Intent Loss after epoch '+str(epoch)+':',intent_loss.item())
        print('-----------------------------------------------')
end = time.time()
elapsed = (end-start)/60.
print('Time elapsed: %.4f mins ' % (elapsed))

## Evaluation of Encoder
 

In [None]:
with open('data/preprocessed/fold_test.json', 'r') as f:
    atis_test_json = json.load(f)

In [None]:
len(atis_test_json['data'])

In [None]:
#atis_test_json['data'][0]

In [None]:
adjusted_atis_test = adjust_sequences(atis_test_json)#padded

In [None]:
test_vocab,test_slots,test_intents = get_vocabularies(adjusted_atis_test)
#test_intents,index2intent

In [None]:
test_data = create_training_set(adjusted_atis_test) # should we cr

In [None]:
len(test_data)#[0][1]

In [None]:
#Exclude pad 
def calc_bat_fscores(y_pred,y_true,predict_type='slot'):
    """
    Calc average f score for a batch.
    compares each predicted output in a batch to actual output.
    Then averages that.
    """
    batch_avg_f = []
    keep_slots = [i for i in tag2index.values() if i > 2]
    if predict_type=='slot':
        for pred_i in range(len(y_pred)):# range batch size
            #print(y_pred[pred_i].size())
            f_score = f1_score(y_true[pred_i], y_pred[pred_i],labels=keep_slots, average ='micro')  
            batch_avg_f.append(f_score)
    else:
        return f1_score(y_true,y_pred,average ='micro')
    return np.mean(batch_avg_f)

In [None]:
import warnings
warnings.filterwarnings('ignore')# get a lot of warnings because some labels are not predicted...

In [None]:
inp = get_batches(256,test_data)
encoder.eval()
f_slot_scores = []
f_intent_scores = []
for data in inp:
    # get the inputs
    inputs, labels , true_intents = data[0],data[1],data[2]
    inputs = inputs.type(torch.LongTensor).to(device)
    labels = labels.to(device)
    true_intents = true_intents.to(device)
    
#     if USE_CUDA and torch.cuda.is_available():
#         inputs = inputs.cuda()
#         labels = labels.cuda()
    
    slots ,intents, slot_embedding = encoder(inputs)
    y_pred_slots = torch.argmax(slots,dim=2)
    f_slot_scores.append(calc_bat_fscores(y_pred_slots,labels))
    y_pred_intents = torch.argmax(intents,dim=1)
    #print(y_pred_intents)
    #print(true_intents)
    #print(list(zip(true_intents.tolist(), y_pred_intents.tolist())))
    f_intent_scores.append(calc_bat_fscores(y_pred_intents,true_intents,predict_type='intent'))
print('Mean Slot F Metric :',np.mean(f_slot_scores))
print('-----------------------------------------------')
print('Mean Intent F Metric :',np.mean(f_intent_scores))

In [None]:
#  # Change line width
# plt.xlabel('Slot F1-Scores')
# sns.boxplot(y = f_slot_scores, linewidth=1,width=0.05)

# Confusion Matrix For Intents since there are much less 

In [None]:
def categoryFromOutput(output):
    category_i = output.item()
    return index2intent[category_i], category_i

In [None]:
for data in get_batches(1,test_data[0:1]):
    input,_,intent = data[0],data[1],data[2]
    input = input.to(device)
    #print(input.size())
    output,intent_pred,_  = encoder(input)
    predictions = torch.argmax(intent_pred,dim=1)[0]
    print(output.size())
    print(predictions)
    print(categoryFromOutput(predictions))


In [None]:
all_categories = []
intent_labels = ['abbreviation','aircraft','aircraft&flight&flight_no',
                 'airfare','airline','airline_flight_no','airport','capacity',
                'cheapest','city','distance','flight','flight&airfare','flight_no','flight_time',
                 'ground_fare','ground_fare_service','ground_service&ground_fare','meal',
                'quantity','restriction']
for category in intent2index.values():
    all_categories.append(category)
      
len(all_categories),len(intent_labels)

In [None]:
#intent_labels
#index2intent
#len(intent_labels)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('png','pdf')

In [None]:
# Keep track of correct guesses in a confusion matrix
n_categories = len(all_categories)
confusion = torch.zeros(n_categories, n_categories)
n_confusion = 5000

# Go through a bunch of examples and record which are correctly guessed
for data in get_batches(1,train_data):
    inputs, labels , true_intents = data[0],data[1],data[2]
    inputs = inputs.type(torch.LongTensor).to(device)
    labels = labels.to(device)
    true_intents = true_intents.to(device)
    output,intent_pred,_  = encoder(inputs)
    predictions = torch.argmax(intent_pred,dim=1)[0]
    #print(predictions, true_intents)
    guess, guess_i = categoryFromOutput(predictions)
    if true_intents.item()!= -1:#intent is in dataset
        category_i = all_categories.index(true_intents)
        confusion[category_i][guess_i] += 1

# Normalize by dividing every row by its sum
for i in range(n_categories):
    confusion[i] = confusion[i] / confusion[i].sum()

# Set up plot
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(confusion.numpy())
fig.colorbar(cax)

# Set up axes
ax.set_xticklabels([''] + intent_labels, rotation=90)
ax.set_yticklabels([''] + intent_labels) #true

# Force label at every tick
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

# sphinx_gallery_thumbnail_number = 2
plt.show()
fig = ax.get_figure()
fig.savefig("nn_cm_with_intent",dpi=300)

In [None]:
fig, ax = plt.subplots(figsize=a4_dims)
fig = ax.get_figure()
fig.savefig("foo.pdf",dpi=300)

In [None]:
f = plt.figure()
f.savefig("foo.pdf")
f.savefig("foo.png")

# T-SNE
* Starting with 100 samples from the test set
* Then keeping track of the labels we pass the samples through our model to get an embedding: [batch size,50,5]
* With this embedding we try to drop remove the embeddings of 0 since they will appear often. 
* With the 0s removed, we apply T-SNE. 
* Plot the T-SNE embedding.

# Would a t-SNE visualization be useful?
Filter out top ten popular classes in testing set.
Then randomly sample slot emebddings from those.
Do the t-SNE embedding.
Remember embeddings are in order, this helps to label.

Looking t-SNE to visualize the results? How do the intent predictions and querie predictions look.
Are the intents near the queries when visualized?

Put in embedding into t-SNE and hopefully observe spatial semantic labels

output should be 870,2

color according to slot value and hopefully see color clusters

## COLOR BY INTENT


In [None]:
inp = get_batches(10,test_data)
encoder.eval()
for data in inp:
    inputs, labels , true_intents = data[0],data[1],data[2]
    inputs = inputs.type(torch.LongTensor).to(device)
    slots ,intents, slot_embedding = encoder(inputs)
slot_embedding.size()# the way it is now this is embedding of inp[90:100]

In [None]:
true_intents

In [None]:
pad_as_vector = slot_embedding[0][-1].cpu().detach().numpy()# will always be a pad
pad_as_vector#.shape

In [None]:
#slot_embedding#.size()
reshaped_slot_embed = slot_embedding.reshape(-1,embed_size).cpu().detach().numpy()
#reshaped_slot_embed[0]

In [None]:
#https://stackoverflow.com/questions/26154711/filter-rows-of-a-numpy-array/26154854
def remove_zero_embed(row,zero_embed):
    return row != zero_embed
bool_arr = np.array([remove_zero_embed(row,pad_as_vector) for row in reshaped_slot_embed])

In [None]:
reshaped_slot_wo_zero = reshaped_slot_embed[bool_arr].reshape(-1,embed_size)

In [None]:
reshaped_slot_wo_zero.shape

In [None]:
#https://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html#sphx-glr-auto-examples-manifold-plot-lle-digits-py
print("Computing t-SNE embedding")
tsne = TSNE(n_components=2,init='pca',random_state=423)

In [None]:
X_tsne = tsne.fit_transform(reshaped_slot_wo_zero)

In [None]:
#labels
reshaped_labels = labels.reshape(-1).detach().numpy()
reshaped_labels_wo_zero = reshaped_labels[reshaped_labels>0]
reshaped_labels_wo_zero.shape

In [None]:
#print(X_tsne[:,0])
fig, ax = plt.subplots(figsize=a4_dims)
for i in range(X_tsne.shape[0]):
#     dist = np.sum((X_tsne[i] - shown_images) ** 2, 1)
#             if np.min(dist) < 4e-3:
#                 # don't show points that are too close
#                 continue
    plt.text(X_tsne[i, 0], X_tsne[i, 1], str(reshaped_labels_wo_zero[i]),
             color=plt.cm.Set1(reshaped_labels_wo_zero[i] / 100.),
             fontdict={'weight': 'bold', 'size': 9})
plt.scatter(X_tsne[:,0],X_tsne[:,1])
plt.title('t-SNE visualization of test data')
fig = ax.get_figure()
#fig.savefig("tsne",dpi=300)
#plt.show()
