In [42]:
from sentence_transformers import SentenceTransformer
from datetime import datetime
from typing import Optional
import logging
import redis
import pickle
import hashlib
import time
import pandas as pd

logger = logging.getLogger(__name__)

In [43]:
model = SentenceTransformer("BAAI/bge-m3")

In [44]:
hachiken_cases = [
    "hachiken ramen",
    "hachikenramen",
    "hachiken  ramen",
    "hachlken ramen",
    "hachikn ramen",
    "hachiken ramem",
    "hachikeen ramen",
    "hatchiken ramen",
    "hchiken ramen",
    "hachiken rame",
    "achiken ramen",
    "hachike",
    "ken ramen",
    "hachike ramen",
    "ramen hachiken",
    "hachiram kenmen",
    "ramenhachiken",
    "hachiramenken",
    "haclken ramen",
    "h4chiken ramen",
    "hachiken rarnen",
    "hacnken ramen",
    "hachikem rameu",
    "  hachiken ramen  ",
    "hachiken   ramen",
    "hachiken\nramen",
    "hachiken\t ramen",
    "h a c h i k e n   r a m e n",
    "hachiken ramen noodles",
    "best hachiken ramen",
    "hachikenramen!!",
    "order hachiken ramen now",
    "hachiken ramen shop",
    "hachiken_ ramen",
    "hachiken ramen123",
    "ha c h ik en  rarnen!!!",
    "haclkenramen now",
    "ramen hachiken noodl",
    "hchikn r4men",
    "hatchikem  rame",
    "hachikenramn"
]

starbucks_cases = [
    "starbucks",
    "star bucks",
    "Starbucks",
    "starbuvks",
    "strabucks",
    "starbicks",
    "stqrbucks",
    "starrbucks",
    "starbuck",
    "starbuc",
    "bucksstar",
    "bucks star",
    "st4rbucks",
    "staЯbucks",
    "starnuсks",
    "starbucķs",
    "stɑrbucks",
    "  starbucks  ",
    "star   bucks",
    "star\nbucks",
    "s t a r b u c k s",
    "order starbucks now",
    "best starbucks coffee",
    "st4r buks!!",
    "order staЯbucķs now"
]

pizza_cases = [
    "พิซซ่าคอมปะนี",
    "พิซซ่า คอมปะนี",
    "พิซซ่าคอมปะนีิ",
    "พิซซ่าคอมปะนร",
    "พีซซ่าคอมปะนี",
    "พิซซ่าคอม",
    "คอมปะนี",
    "พิซซ่า",
    "คอมปะนีพิซซ่า",
    "คอมปะนี พิซซ่า",
    "พิซซ่าคอมปะนl",
    "พิซซ่าคอมปะนีิิ",
    "พิซซ่า   คอมปะนี",
    "พิซซ่า\nคอมปะนี",
    "พิซซ่าคอมปะนีเดลิเวอรี่",
    "ร้านพิซซ่าคอมปะนี",
    "พิซซ่า คอม ปะนl!!",
    "พิ ซ ซ่า ค อ ม ปะ นี!!!"
]

iberry_cases = [
    "บริษัท iberry จำกัด มหาชน",
    "บริษัท iberry จำกัดมหาชน",
    "บริษัท iberry จำกัด  มหาชน",
    "บริษท iberry จำกัด มหาชน",
    "บรษัท ibery จำกัด มหาชน",
    "บริษัท ibery จำกัด มหานชน",
    "บริษัท iberry",
    "iberry จำกัด",
    "บริษัท iber",
    "iberry มหาชน",
    "iberry บริษัท จำกัด มหาชน",
    "จำกัด บริษัท iberry มหาชน",
    "บริษัn iberry จำกัด มหาชน",
    "บริษํท iberry จํากัด มหาชน",
    "บ ริ ษั ท iberry จำ กั ด ม ห า ช น",
    "บริษัท\niberry\tจำกัด   มหาชน",
    "บริษัท iberry จำกัด มหาชน ประเทศไทย",
    "iberry จำกัด มหาชน group",
    "iberry จำกัดมหาชน!!",
    "บริษัท iberry จำกัด มหาชน 2025",
    "บริษํท ibery จํากัด มหานชน",
    "บ ริ ษั ท ibery จำกัดมห า ชน",
    "บริษัท ibery มหาชน จำกัด"
]

In [45]:
hachiken_case = "hachiken ramen"
starbucks_case = 'starbucks'
pizza_case = 'พิซซ่าคอมปะนี'
iberry_case = 'บริษัท iberry จำกัด มหาชน'

# Compute embeddings for both lists
hachiken_embedded = model.encode(hachiken_cases)
hachiken_true_embedded = model.encode(hachiken_case)

starbucks_embedded = model.encode(starbucks_cases)
starbucks_true_embedded = model.encode(starbucks_case)

pizza_embedded = model.encode(pizza_cases)
pizza_true_embedded = model.encode(pizza_case)

iberry_embedded = model.encode(iberry_cases)
iberry_true_embedded = model.encode(iberry_case)

In [46]:
model_scores = []
for i in range(len(hachiken_embedded)):
    start_time = time.time()
    score = model.similarity(hachiken_embedded[i], hachiken_true_embedded)
    end_time = time.time()
    elapsed = end_time - start_time
    model_scores.append({'test_case': hachiken_cases[i], 'score': score.item()*100, 'time': elapsed})
    

In [47]:
for i in range(len(pizza_embedded)):
    start_time = time.time()
    score = model.similarity(pizza_embedded[i], pizza_true_embedded)
    end_time = time.time()
    elapsed = end_time - start_time
    model_scores.append({'test_case': pizza_cases[i], 'score': score.item()*100, 'time': elapsed})
    

In [48]:
for i in range(len(iberry_embedded)):
    start_time = time.time()
    score = model.similarity(iberry_embedded[i], iberry_true_embedded)
    end_time = time.time()
    elapsed = end_time - start_time
    model_scores.append({'test_case': iberry_cases[i], 'score': score.item()*100, 'time': elapsed})
    

In [49]:
for i in range(len(starbucks_embedded)):
    start_time = time.time()
    score = model.similarity(starbucks_embedded[i], starbucks_true_embedded)
    end_time = time.time()
    elapsed = end_time - start_time
    model_scores.append({'test_case': starbucks_cases[i], 'score': score.item()*100, 'time': elapsed})
    

In [50]:
model_scores

[{'test_case': 'hachiken ramen',
  'score': 99.99999403953552,
  'time': 0.0005662441253662109},
 {'test_case': 'hachikenramen',
  'score': 78.66547703742981,
  'time': 9.775161743164062e-05},
 {'test_case': 'hachiken  ramen',
  'score': 99.99999403953552,
  'time': 7.343292236328125e-05},
 {'test_case': 'hachlken ramen',
  'score': 83.20984244346619,
  'time': 6.341934204101562e-05},
 {'test_case': 'hachikn ramen',
  'score': 92.82689094543457,
  'time': 6.103515625e-05},
 {'test_case': 'hachiken ramem',
  'score': 84.12150740623474,
  'time': 7.843971252441406e-05},
 {'test_case': 'hachikeen ramen',
  'score': 96.05879783630371,
  'time': 7.152557373046875e-05},
 {'test_case': 'hatchiken ramen',
  'score': 93.18318367004395,
  'time': 7.128715515136719e-05},
 {'test_case': 'hchiken ramen',
  'score': 90.67526459693909,
  'time': 6.246566772460938e-05},
 {'test_case': 'hachiken rame',
  'score': 85.4762852191925,
  'time': 9.036064147949219e-05},
 {'test_case': 'achiken ramen',
  'sco

In [51]:
df = pd.DataFrame(model_scores)

In [52]:
df.to_excel("sentence_transformer_test_cases_results2.xlsx", index=False)