In [None]:
import sys
import os

project_root = os.path.abspath(r"C:\Users\hp\Desktop\semantic_similarity")
if project_root not in sys.path:
    sys.path.append(project_root)

In [3]:
import json
import numpy as np
from tqdm import tqdm

from models.tfidf_model import initialize_tfidf, tfidf_similarity
from models.word2vec_model import word2vec_similarity
from models.transformer_model import transformer_similarity




Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [4]:
import json

with open(r"C:\Users\hp\Desktop\semantic_similarity\data\test_pairs.json") as f:
    dataset = json.load(f)

In [5]:
all_sentences = []

for category in dataset:
    for pair in dataset[category]:
        all_sentences.append(pair[0])
        all_sentences.append(pair[1])

print("Total sentences in corpus:", len(all_sentences))


Total sentences in corpus: 200


In [6]:
initialize_tfidf(all_sentences)
print("Global TF-IDF fitted.")


Global TF-IDF fitted.


In [7]:
results = {}

for category in dataset:
    print(f"\nProcessing category: {category}")
    
    results[category] = []
    
    for s1, s2 in tqdm(dataset[category]):
        
        tfidf_score = tfidf_similarity(s1, s2)
        w2v_score = word2vec_similarity(s1, s2)
        transformer_score = transformer_similarity(s1, s2)
        
        results[category].append({
            "s1": s1,
            "s2": s2,
            "tfidf": float(tfidf_score),
            "word2vec": float(w2v_score),
            "transformer": float(transformer_score)
        })



Processing category: exact_match


100%|██████████| 10/10 [00:00<00:00, 53.90it/s]



Processing category: synonyms


100%|██████████| 15/15 [00:00<00:00, 73.10it/s]



Processing category: antonyms


100%|██████████| 15/15 [00:00<00:00, 76.78it/s]



Processing category: negation


100%|██████████| 15/15 [00:00<00:00, 73.98it/s]



Processing category: word_order


100%|██████████| 10/10 [00:00<00:00, 69.06it/s]



Processing category: abbreviations


100%|██████████| 10/10 [00:00<00:00, 57.12it/s]



Processing category: domain_shift


100%|██████████| 10/10 [00:00<00:00, 58.91it/s]



Processing category: same_topic_diff_stance


100%|██████████| 15/15 [00:00<00:00, 65.55it/s]


In [8]:
with open(r"C:\Users\hp\Desktop\semantic_similarity\data\evaluation_results.json", "w") as f:
    json.dump(results, f, indent=4)

print("Results saved to data/evaluation_results.json")


Results saved to data/evaluation_results.json


In [10]:
results["synonyms"]


[{'s1': 'He bought a car',
  's2': 'He purchased an automobile',
  'tfidf': 0.0,
  'word2vec': 0.924019992351532,
  'transformer': 0.911146342754364},
 {'s1': 'The child is happy',
  's2': 'The kid is joyful',
  'tfidf': 0.0,
  'word2vec': 0.933924674987793,
  'transformer': 0.8792229890823364},
 {'s1': 'The exam was difficult',
  's2': 'The test was hard',
  'tfidf': 0.0,
  'word2vec': 0.9310215711593628,
  'transformer': 0.7979397773742676},
 {'s1': 'She is intelligent',
  's2': 'She is smart',
  'tfidf': 0.0,
  'word2vec': 0.967972993850708,
  'transformer': 0.8964240550994873},
 {'s1': 'The meeting was cancelled',
  's2': 'The meeting was called off',
  'tfidf': 0.40683347249583723,
  'word2vec': 0.9499539732933044,
  'transformer': 0.8203722238540649},
 {'s1': 'The house is large',
  's2': 'The home is big',
  'tfidf': 0.0,
  'word2vec': 0.9398915767669678,
  'transformer': 0.8912922143936157},
 {'s1': 'He started laughing',
  's2': 'He began laughing',
  'tfidf': 0.46256111108067

## AVERAGE SIMILARITY IN EACH CATEGORY:-


In [11]:
import numpy as np

category_means = {}

for category in results:
    
    tfidf_scores = [pair["tfidf"] for pair in results[category]]
    w2v_scores = [pair["word2vec"] for pair in results[category]]
    transformer_scores = [pair["transformer"] for pair in results[category]]
    
    category_means[category] = {
        "tfidf_mean": np.mean(tfidf_scores),
        "word2vec_mean": np.mean(w2v_scores),
        "transformer_mean": np.mean(transformer_scores)
    }

category_means


{'exact_match': {'tfidf_mean': np.float64(1.0),
  'word2vec_mean': np.float64(0.9999999940395355),
  'transformer_mean': np.float64(1.0000000178813935)},
 'synonyms': {'tfidf_mean': np.float64(0.05795963890510111),
  'word2vec_mean': np.float64(0.9352444767951965),
  'transformer_mean': np.float64(0.7996547619501749)},
 'antonyms': {'tfidf_mean': np.float64(0.3957115914126818),
  'word2vec_mean': np.float64(0.963489846388499),
  'transformer_mean': np.float64(0.8087936798731487)},
 'negation': {'tfidf_mean': np.float64(0.8878563856688516),
  'word2vec_mean': np.float64(0.9644668896993002),
  'transformer_mean': np.float64(0.8303131461143494)},
 'word_order': {'tfidf_mean': np.float64(1.0),
  'word2vec_mean': np.float64(0.9977775633335113),
  'transformer_mean': np.float64(0.9829902410507202)},
 'abbreviations': {'tfidf_mean': np.float64(0.5959773766394568),
  'word2vec_mean': np.float64(0.855217695236206),
  'transformer_mean': np.float64(0.7896298110485077)},
 'domain_shift': {'tfidf_

In [13]:
for category, scores in category_means.items():
    print(f"\nCategory: {category}")
    print(f"TF-IDF Mean: {scores['tfidf_mean']:.3f}")
    print(f"Word2Vec Mean: {scores['word2vec_mean']:.3f}")
    print(f"Transformer Mean: {scores['transformer_mean']:.3f}")



Category: exact_match
TF-IDF Mean: 1.000
Word2Vec Mean: 1.000
Transformer Mean: 1.000

Category: synonyms
TF-IDF Mean: 0.058
Word2Vec Mean: 0.935
Transformer Mean: 0.800

Category: antonyms
TF-IDF Mean: 0.396
Word2Vec Mean: 0.963
Transformer Mean: 0.809

Category: negation
TF-IDF Mean: 0.888
Word2Vec Mean: 0.964
Transformer Mean: 0.830

Category: word_order
TF-IDF Mean: 1.000
Word2Vec Mean: 0.998
Transformer Mean: 0.983

Category: abbreviations
TF-IDF Mean: 0.596
Word2Vec Mean: 0.855
Transformer Mean: 0.790

Category: domain_shift
TF-IDF Mean: 0.222
Word2Vec Mean: 0.749
Transformer Mean: 0.305

Category: same_topic_diff_stance
TF-IDF Mean: 0.573
Word2Vec Mean: 0.907
Transformer Mean: 0.798


In [None]:
def logical_failure_rate(category, model_key, threshold=0.7):
    
    
    failures = 0
    total = len(results[category])
    
    for pair in results[category]:
        if pair[model_key] > threshold:
            failures += 1
            
    return failures / total


In [15]:
logical_failure_rate("negation", "transformer")


0.9333333333333333

In [16]:
logical_failure_rate("negation", "word2vec")


1.0

In [17]:
logical_failure_rate("negation", "tfidf")


0.8

In [19]:
logical_failure_rate("antonyms", "transformer")

0.9333333333333333

In [20]:
logical_failure_rate("antonyms", "word2vec")

1.0

In [21]:
logical_failure_rate("antonyms", "tfidf")

0.0

In [22]:
logical_failure_rate("word_order", "transformer")

1.0

In [23]:
logical_failure_rate("word_order", "word2vec")

1.0

In [24]:
logical_failure_rate("word_order", "tfidf")

1.0