In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load & preprocess data
df=pd.read_excel("Testing File.xlsx")

In [3]:
df.shape

(564, 2)

In [4]:
df.columns

Index(['ID', 'Translation'], dtype='object')

In [5]:
df

Unnamed: 0,ID,Translation
0,78|1,Concerning what are they disputing?
1,78|2,"Concerning the Great News,"
2,78|3,About which they cannot agree.
3,78|4,"Verily, they shall soon (come to) know!"
4,78|5,"Verily, verily they shall soon (come to) know!"
...,...,...
559,114|2,"The King (or Ruler) of Mankind,"
560,114|3,"The god (or judge) of Mankind,-"
561,114|4,"From the mischief of the Whisperer (of Evil), ..."
562,114|5,(The same) who whispers into the hearts of Man...


In [6]:
verses = []
texts = []

for idx, row in df.iterrows():
    verse_id = str(row['ID'])
    translation = str(row['Translation'])
    
    verses.append({
        'id': verse_id,
        'translation': translation
    })
    texts.append(translation)

In [7]:
from sentence_transformers import SentenceTransformer

# Load a pretrained Sentence Transformer model
model = SentenceTransformer("BAAI/bge-large-en")

In [8]:
model.max_seq_length

512

In [9]:
# Generate Embeddings
embeddings = model.encode(texts, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=True)

Batches:   0%|          | 0/18 [00:00<?, ?it/s]

In [10]:
embeddings

array([[-0.00343093, -0.00909118, -0.03768877, ..., -0.01089873,
        -0.00244242, -0.01794033],
       [ 0.01650619, -0.01127368, -0.01541262, ...,  0.01103976,
        -0.0256019 , -0.00903833],
       [ 0.00504742, -0.01364853, -0.03772821, ...,  0.00353611,
        -0.04527003, -0.02437852],
       ...,
       [-0.0385254 , -0.008681  , -0.0197363 , ..., -0.03154683,
        -0.04154152, -0.0011436 ],
       [-0.00495573,  0.01731438, -0.01904378, ..., -0.00429638,
        -0.02490644, -0.03767857],
       [-0.00842436,  0.00151391, -0.00187488, ..., -0.00129223,
         0.00499626, -0.01322016]], shape=(564, 1024), dtype=float32)

In [11]:
# Embeddings shape
embeddings.shape

(564, 1024)

In [12]:
# Embedding dimension
embeddings.shape[1]

1024

In [13]:
# Data type
embeddings.dtype

dtype('float32')

In [15]:
import faiss

In [16]:
dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(embeddings)

In [17]:
# Define Search Function
def search_verses(query, k):
    print(f"Query: '{query}'")
    
    # Generate query embedding
    query_embedding = model.encode([query])
    print(f"Query embedding shape: {query_embedding.shape}")
    
    # Normalize query embedding
    faiss.normalize_L2(query_embedding)
    
    # Search
    scores, indices = index.search(query_embedding.astype(np.float32), k)
    print(f"Found {len(indices[0])} results.")
    
    # Format results
    results = []
    for score, idx in zip(scores[0], indices[0]):
        results.append({
            'Score': float(score),
            'ID': verses[idx]['id'],
            'Translation': verses[idx]['translation']
        })
    
    return results

In [21]:
# Test Search
query1 = "What happened to the people of the elephant?"
results1 = search_verses(query1, k=5)

Query: 'What happened to the people of the elephant?'
Query embedding shape: (1, 1024)
Found 5 results.


In [22]:
for i, result in enumerate(results1, 1):
    print(f"Score: {result['Score']:.4f}")
    print(f"Verse ID: {result['ID']}")
    print(f"Translation: {result['Translation']}")

Score: 0.8540
Verse ID: 105|1
Translation: Seest thou not how thy Lord dealt with the Companions of the Elephant?
Score: 0.7950
Verse ID: 88|1
Translation: Has the story reached thee of the overwhelming (Event)?
Score: 0.7885
Verse ID: 85|17
Translation: Has the story reached thee, of the forces-
Score: 0.7865
Verse ID: 89|6
Translation: Seest thou not how thy Lord dealt with the 'Ad (people),-
Score: 0.7852
Verse ID: 83|36
Translation: Will not the Unbelievers have been paid back for what they did?


In [23]:
query2 = "What will happen on the Day of Judgment?"
results2 = search_verses(query2, k=5)

Query: 'What will happen on the Day of Judgment?'
Query embedding shape: (1, 1024)
Found 5 results.


In [24]:
for i, result in enumerate(results2, 1):
    print(f"Score: {result['Score']:.4f}")
    print(f"Verse ID: {result['ID']}")
    print(f"Translation: {result['Translation']}")

Score: 0.9329
Verse ID: 82|17
Translation: And what will explain to thee what the Day of Judgment is?
Score: 0.9258
Verse ID: 82|18
Translation: Again, what will explain to thee what the Day of Judgment is?
Score: 0.8980
Verse ID: 82|15
Translation: Which they will enter on the Day of Judgment,
Score: 0.8921
Verse ID: 85|2
Translation: By the promised Day (of Judgment);
Score: 0.8738
Verse ID: 95|7
Translation: Then what can, after this, contradict thee, as to the judgment (to come)?


In [36]:
query3 = "What is said about helping the needy and feeding the poor?"
results3 = search_verses(query3, k=5)

Query: 'What is said about helping the needy and feeding the poor?'
Query embedding shape: (1, 1024)
Found 5 results.


In [37]:
for i, result in enumerate(results3, 1):
    print(f"Score: {result['Score']:.4f}")
    print(f"Verse ID: {result['ID']}")
    print(f"Translation: {result['Translation']}")

Score: 0.8438
Verse ID: 89|18
Translation: Nor do ye encourage one another to feed the poor!-
Score: 0.8357
Verse ID: 90|14
Translation: Or the giving of food in a day of privation
Score: 0.8306
Verse ID: 93|6
Translation: Did He not find thee an orphan and give thee shelter (and care)?
Score: 0.7992
Verse ID: 96|10
Translation: A votary when he (turns) to pray?
Score: 0.7975
Verse ID: 106|4
Translation: Who provides them with food against hunger, and with security against fear (of danger).
