In [9]:
from sentence_transformers import SentenceTransformer
from sklearn.manifold import TSNE

import pandas as pd
import numpy as np
import plotly.graph_objs as go

# Load the models

- SBERT (For Sentence-based vectors)
- BERT

In [2]:
# Load the pre-trained model (SBERT)
model_name = 'bert-base-nli-mean-tokens'
model = SentenceTransformer(model_name)

# Load the data

Utteraces: For utterance-based analysis

Dialog Act labels: For dialog act analysis

In [16]:
# Load the utterances and dialog act labels

def load_utterances():
    
    utterances_dict = {}
    
    with open('data_loaders/dailydialog/dialogues_text.txt', encoding='utf-8') as f:
        
        for index, line in enumerate(f):
            
            utterances = line.replace('’',"'").split('__eou__')
            utterances.pop(-1)
            utterances_dict[index] = utterances
    
    return utterances_dict
    
def load_labels():
    labels_dict = {}
    
    with open('data_loaders/dailydialog/dialogues_act.txt') as f:
        for index, line in enumerate(f):
            labels_dict[index] = line.strip().split(' ')
    
    return labels_dict

utterances_list = load_utterances()
dialog_labels = load_labels()

# Utterance Features visualization

- **Input**: Sentence
- **Output**: Feature Vector
- **Visualize**: T-SNE + PCA + Scatter Plot

In [22]:
# Convert to Dataframe

# First extract all the sentences
# and the corresponding labels
utterances = []
da_labels = []

for key, utter_list in utterances_list.items():
    
    # Get the DA labels
    da_label_list = dialog_labels[key]
    
    utterances += [utterance for utterance in utter_list]
    da_labels += [label for label in da_label_list]

In [None]:
sentences = ['I like eating burgers', 'Burgers are tasty!', 'This is the last one']
sentence_embeddings = model.encode(sentences)
print(len(sentence_embeddings[0]))

In [None]:
perplexity = 2

tsne = TSNE(
    n_components=2, random_state=42, 
    perplexity = perplexity, init='pca',
    learning_rate=200
)


tsne_embeddings = tsne.fit_transform(sentence_embeddings)

In [None]:
scatter_trace = go.Scatter(
    x = tsne_embeddings[:, 0],
    y = tsne_embeddings[:, 1],
    mode = 'markers',
    marker = dict(
        size = 10,
        color = ['red', 'green', 'blue']
    ),
    text = sentences
)

data = [scatter_trace]

In [None]:
layout = dict(
    title = 'SBERT Visualization',
    xaxis = dict(title='Dimesion 1'),
    yaxis = dict(title='Dimension 2'),
    hovermode = 'closest'
)

In [None]:
fig = go.Figure(data=data, layout=layout)
fig.show()