#### Calculate Semantic Similarity

In [172]:
# Access files and folders within the Google Drive
from google.colab import drive
drive.mount('/content/drive')

from google.colab import userdata
userdata.get('HF_TOKEN')

# Set up the current working directory within the Google Drive
%cd /content/drive/My\ Drive/Colab\ Notebooks/LLM/teacher_standard

# Set up the current working directory within G Drive (Desktop)
# %cd "G:/My Drive/Colab Notebooks/LLM/teacher_standard"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Colab Notebooks/LLM/teacher_standard


In [173]:
# !pip install sentence_transformers
# !pip install --upgrade huggingface_hub
# !pip install nbconvert

In [174]:
import warnings
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import networkx as nx
import plotly.graph_objects as go
from matplotlib import cm
from plotly.offline import plot

warnings.filterwarnings('ignore')

df = pd.read_excel("standard_items.xlsx")

model = SentenceTransformer('all-MiniLM-L6-v2')

combined_texts = df['item'].tolist()

embeddings = model.encode(combined_texts)

similarity_matrix = cosine_similarity(embeddings)
similarity_df = pd.DataFrame(similarity_matrix, index=df['id'], columns=df['id'])

In [175]:
edges = []
for i in range(len(similarity_matrix)):
    for j in range(i + 1, len(similarity_matrix)):
        similarity = similarity_matrix[i][j]
        if similarity > 0.5:
            edges.append((df['id'][i], df['id'][j], similarity))

edges_df = pd.DataFrame(edges, columns=['source', 'target', 'weight'])

unique_values = df['standard'].unique()
colormap = cm.get_cmap('viridis', len(unique_values))
color_map = {val: colormap(i) for i, val in enumerate(unique_values)}

G = nx.Graph()

for idx, row in df.iterrows():
    G.add_node(row['id'], label=row['id'], item=row['item'], color=color_map[row['standard']], category=row['standard'])

for _, row in edges_df.iterrows():
    G.add_edge(row['source'], row['target'], weight=row['weight'])

pos = nx.spring_layout(G, seed=42)

edge_x = []
edge_y = []

for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x += [x0, x1, None]
    edge_y += [y0, y1, None]

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines'
)

node_traces = []
for category in unique_values:
    node_x = []
    node_y = []
    node_text = []
    hover_text = []
    node_color = []

    for node in G.nodes():
        if G.nodes[node]['category'] == category:
            x, y = pos[node]
            node_x.append(x)
            node_y.append(y)
            node_text.append(G.nodes[node]['label'])
            hover_text.append(f"{G.nodes[node]['label']}: {G.nodes[node]['item']}")
            node_color.append(f"rgba{tuple(int(c * 255) for c in G.nodes[node]['color'])}")

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        text=node_text,
        mode='markers+text',
        textposition='top center',
        hoverinfo='text',
        hovertext=hover_text,
        marker=dict(
            showscale=False,
            color=node_color,
            size=15,
            line_width=2
        ),
        name=str(category)
    )
    node_traces.append(node_trace)

fig = go.Figure(data=[edge_trace] + node_traces,
                layout=go.Layout(
                    width=800,
                    height=600,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20, l=5, r=5, t=40),
                    plot_bgcolor='rgba(0,0,0,0)',
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )

In [180]:
plot(fig, filename='plot.html', auto_open=False)

'plot.html'

In [181]:
from nbconvert import HTMLExporter
import nbformat

notebook_path = 'python.ipynb'
html_exporter = HTMLExporter()

with open(notebook_path, 'r', encoding='utf-8') as nb_file:
    notebook_content = nb_file.read()
    notebook = nbformat.reads(notebook_content, as_version=4)

html_output, _ = html_exporter.from_notebook_node(notebook)

with open('python.html', 'w', encoding='utf-8') as html_file:
    html_file.write(html_output)