## Imports

In [None]:
# text similarity related imports
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# plotting related imports
import matplotlib.pyplot as plt
import networkx as nx

# jupyter voila related
import ipywidgets
# from ipywidgets import HBox, VBox, IntSlider, interactive_output
from IPython.display import display

## Base class declaration

In [None]:
class DetectorDeCola():

    def __init__(self, color=(0, 0, 1), node_color='orange',
                 transformer_name='distiluse-base-multilingual-cased'):
        self.color = color
        self.node_color = node_color
        self.sbert_model = SentenceTransformer(transformer_name)

    def create_sim_matrix(self, answers):
        embeddings = self.sbert_model.encode(answers)
        return cosine_similarity(embeddings)

    def generate_manual_graph(self, sim_matrix, threshold):
        graph = {}
        for i in range(sim_matrix.shape[0]):
            for j in range(sim_matrix.shape[1]):
                if j <= i:
                    continue
                if sim_matrix[i][j] <= threshold:
                    continue
                similarity = sim_matrix[i][j]
                edge_attr = {
                    'color': (*self.color, min(1, (similarity-threshold)*(1/threshold))),
                    'length': max(0, 100-(similarity*100)),
                    'width': similarity*10,
                    'edge_label': str(round(similarity*100)) + '%'
                }
                if i+1 not in graph:
                    graph[i+1] = {}
                if j+1 not in graph:
                    graph[j+1] = {}
                graph[i+1][j+1] = edge_attr
                graph[j+1][i+1] = edge_attr
        return graph

    def generate_edge_labels(self, graph):
        edge_labels = {}
        for i in graph.items():
            for j in i[1].items():
                edge_labels[(i[0], j[0])] = j[1]['edge_label']
        return edge_labels

    def plot_single_graph(self, graph, edge_labels):
        graph_nx = nx.from_dict_of_dicts(graph)
        pos = nx.planar_layout(graph_nx)
        nx.draw(
            graph_nx, pos, 
            node_size=1000,
            edge_color=nx.get_edge_attributes(graph_nx, 'color').values(), 
            width=list(nx.get_edge_attributes(graph_nx, 'width').values()),
            with_labels=True,
            node_color=self.node_color
        )
        nx.draw_networkx_edge_labels(
            graph_nx, pos, edge_labels=edge_labels
        )
        plt.show()

    def plot_all_questions_graphs(questions):
        for question in questions:
            sim_matrix = self.create_sim_matrix(question)
            graph = self.generate_manual_graph(sim_matrix)
            edge_labels = self.generate_edge_labels(graph)
            self.plot_single_graph(graph, edge_labels)


In [None]:
question_1_answers = [
    'Napoleão morreu de ataque cardíaco em setembro, e por isso ele perdeu a guerra',
    'Napoleão morreu de ataque cardíaco em setembro, e por isso ele perdeu a guerra',
    'Napoles é uma cidade legal na itália onde napoleão passou férias',
    'Napoles é uma cidade legal na itália onde napoleão passou férias',
    'Napoleão morreu de infarto em setembro, e por isso ele ganhou a guerra',
]

In [None]:
question_options = [i+1 for i in list(range(len(question_1_answers)))]
question_slider = ipywidgets.SelectMultiple(
    options=question_options,
    value=question_options,  # default value
    disabled=False
)
threshold_slider = ipywidgets.FloatSlider(value=0.5, min=0, max=1, step=0.05)

In [None]:
threshold_slider

In [None]:
question_slider

In [None]:
detector_de_cola = DetectorDeCola()
sim_matrix = detector_de_cola.create_sim_matrix(question_1_answers)
graph = detector_de_cola.generate_manual_graph(sim_matrix, threshold=threshold_slider.value)
edge_labels = detector_de_cola.generate_edge_labels(graph)
detector_de_cola.plot_single_graph(graph, edge_labels)