In [None]:
!pip install -q sentence-transformers scikit-learn pandas numpy matplotlib


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
models = {
    "MiniLM": "sentence-transformers/all-MiniLM-L6-v2",
    "MPNet": "sentence-transformers/all-mpnet-base-v2",
    "DistilRoBERTa": "sentence-transformers/paraphrase-distilroberta-base-v1",
    "BERT": "sentence-transformers/bert-base-nli-mean-tokens"
}


In [None]:
sentence_pairs = [
    ("Rescue robots help during disasters",
     "Robots assist humans in disaster situations"),

    ("Autonomous vehicles use sensors",
     "Self driving cars rely on cameras and sensors"),

    ("Pizza delivery is fast",
     "Natural language processing uses transformers"),

    ("AI models learn from data",
     "Machine learning systems are trained using data")
]


In [None]:
criteria = ["Similarity", "InferenceTime", "ModelSize", "EmbeddingDim"]


In [None]:
results = []

for name, model_name in models.items():
    model = SentenceTransformer(model_name)

    similarities = []
    start_time = pd.Timestamp.now()

    for s1, s2 in sentence_pairs:
        emb = model.encode([s1, s2])
        sim = cosine_similarity([emb[0]], [emb[1]])[0][0]
        similarities.append(sim)

    end_time = pd.Timestamp.now()

    avg_similarity = np.mean(similarities)
    inference_time = (end_time - start_time).total_seconds()
    model_size = sum(p.numel() for p in model.parameters()) * 4 / (1024**2)
    embedding_dim = emb.shape[1]

    results.append([
        name,
        avg_similarity,
        inference_time,
        model_size,
        embedding_dim
    ])


In [None]:
df = pd.DataFrame(
    results,
    columns=["Model", "Similarity", "InferenceTime", "ModelSize", "EmbeddingDim"]
)

df


In [None]:
weights = np.array([0.4, 0.2, 0.2, 0.2])   # Sum = 1

impacts = ["+", "-", "-", "+"]


In [None]:
decision_matrix = df.iloc[:, 1:].values

norm_matrix = decision_matrix / np.sqrt((decision_matrix ** 2).sum(axis=0))


In [None]:
weighted_matrix = norm_matrix * weights


In [None]:
ideal_best = []
ideal_worst = []

for i in range(len(impacts)):
    if impacts[i] == "+":
        ideal_best.append(weighted_matrix[:, i].max())
        ideal_worst.append(weighted_matrix[:, i].min())
    else:
        ideal_best.append(weighted_matrix[:, i].min())
        ideal_worst.append(weighted_matrix[:, i].max())

ideal_best = np.array(ideal_best)
ideal_worst = np.array(ideal_worst)


In [None]:
distance_best = np.sqrt(((weighted_matrix - ideal_best) ** 2).sum(axis=1))
distance_worst = np.sqrt(((weighted_matrix - ideal_worst) ** 2).sum(axis=1))


In [None]:
topsis_score = distance_worst / (distance_best + distance_worst)

df["TOPSIS Score"] = topsis_score


In [None]:
df["Rank"] = df["TOPSIS Score"].rank(ascending=False)

df.sort_values("Rank")


In [None]:
plt.figure()
plt.bar(df["Model"], df["TOPSIS Score"])
plt.xlabel("Model")
plt.ylabel("TOPSIS Score")
plt.title("TOPSIS Ranking of Sentence Similarity Models")
plt.show()
