In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load & preprocess data
df=pd.read_excel("Testing File.xlsx")

In [3]:
df.shape

(564, 2)

In [4]:
df.columns

Index(['ID', 'Translation'], dtype='object')

In [5]:
df

Unnamed: 0,ID,Translation
0,78|1,Concerning what are they disputing?
1,78|2,"Concerning the Great News,"
2,78|3,About which they cannot agree.
3,78|4,"Verily, they shall soon (come to) know!"
4,78|5,"Verily, verily they shall soon (come to) know!"
...,...,...
559,114|2,"The King (or Ruler) of Mankind,"
560,114|3,"The god (or judge) of Mankind,-"
561,114|4,"From the mischief of the Whisperer (of Evil), ..."
562,114|5,(The same) who whispers into the hearts of Man...


In [6]:
verses = []
texts = []

for idx, row in df.iterrows():
    verse_id = str(row['ID'])
    translation = str(row['Translation'])
    
    verses.append({
        'id': verse_id,
        'translation': translation
    })
    texts.append(translation)

In [7]:
len(verses)

564

In [8]:
verses

[{'id': '78|1', 'translation': 'Concerning what are they disputing?'},
 {'id': '78|2', 'translation': 'Concerning the Great News,'},
 {'id': '78|3', 'translation': 'About which they cannot agree.'},
 {'id': '78|4', 'translation': 'Verily, they shall soon (come to) know!'},
 {'id': '78|5',
  'translation': 'Verily, verily they shall soon (come to) know!'},
 {'id': '78|6',
  'translation': 'Have We not made the earth as a wide expanse,'},
 {'id': '78|7', 'translation': 'And the mountains as pegs?'},
 {'id': '78|8', 'translation': 'And (have We not) created you in pairs,'},
 {'id': '78|9', 'translation': 'And made your sleep for rest,'},
 {'id': '78|10', 'translation': 'And made the night as a covering,'},
 {'id': '78|11', 'translation': 'And made the day as a means of subsistence?'},
 {'id': '78|12',
  'translation': 'And (have We not) built over you the seven firmaments,'},
 {'id': '78|13', 'translation': 'And placed (therein) a Light of Splendour?'},
 {'id': '78|14',
  'translation': '

In [9]:
texts

['Concerning what are they disputing?',
 'Concerning the Great News,',
 'About which they cannot agree.',
 'Verily, they shall soon (come to) know!',
 'Verily, verily they shall soon (come to) know!',
 'Have We not made the earth as a wide expanse,',
 'And the mountains as pegs?',
 'And (have We not) created you in pairs,',
 'And made your sleep for rest,',
 'And made the night as a covering,',
 'And made the day as a means of subsistence?',
 'And (have We not) built over you the seven firmaments,',
 'And placed (therein) a Light of Splendour?',
 'And do We not send down from the clouds water in abundance,',
 'That We may produce therewith corn and vegetables,',
 'And gardens of luxurious growth?',
 'Verily the Day of Sorting out is a thing appointed,',
 'The Day that the Trumpet shall be sounded, and ye shall come forth in crowds;',
 'And the heavens shall be opened as if there were doors,',
 'And the mountains shall vanish, as if they were a mirage.',
 'Truly Hell is as a place of am

In [10]:
from sentence_transformers import SentenceTransformer

# Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

In [11]:
model.max_seq_length

256

In [12]:
# Generate Embeddings
embeddings = model.encode(texts, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=True)

Batches:   0%|          | 0/18 [00:00<?, ?it/s]

In [13]:
embeddings

array([[-0.12165681,  0.07617427,  0.05425889, ..., -0.02766684,
         0.11036482,  0.06631723],
       [ 0.01452808,  0.0578208 ,  0.05640182, ..., -0.01346717,
         0.05134661,  0.05465131],
       [ 0.00048161,  0.07368334, -0.0711009 , ...,  0.0479711 ,
         0.07384132,  0.0206504 ],
       ...,
       [-0.03843082,  0.05808589,  0.08009238, ...,  0.03959582,
        -0.00853343,  0.00872012],
       [-0.04497285,  0.02105842, -0.00708657, ..., -0.00562495,
         0.04190932, -0.01615296],
       [-0.05401439,  0.08505791,  0.00329416, ..., -0.09199855,
        -0.06999613,  0.02158283]], shape=(564, 384), dtype=float32)

In [14]:
# Embeddings shape
embeddings.shape

(564, 384)

In [15]:
# Embedding dimension
embeddings.shape[1]

384

In [16]:
# Data type
embeddings.dtype

dtype('float32')

In [17]:
import faiss

In [18]:
# Build FAISS Index
dimension = embeddings.shape[1]
index=faiss.IndexFlatIP(dimension)
index.add(embeddings)

In [19]:
# Define Search Function
def search_verses(query, k):
    print(f"Query: '{query}'")
    
    # Generate query embedding
    query_embedding = model.encode([query])
    print(f"Query embedding shape: {query_embedding.shape}")
    
    # Normalize query embedding
    faiss.normalize_L2(query_embedding)
    
    # Search
    scores, indices = index.search(query_embedding.astype(np.float32), k)
    print(f"Found {len(indices[0])} results.")
    
    # Format results
    results = []
    for score, idx in zip(scores[0], indices[0]):
        results.append({
            'Score': float(score),
            'ID': verses[idx]['id'],
            'Translation': verses[idx]['translation']
        })
    
    return results

In [20]:
# Test Search
query1 = "What happened to the people of the elephant?"
results1 = search_verses(query1, k=3)

Query: 'What happened to the people of the elephant?'
Query embedding shape: (1, 384)
Found 3 results.


In [21]:
for i, result in enumerate(results1, 1):
    print(f"Score: {result['Score']:.4f}")
    print(f"Verse ID: {result['ID']}")
    print(f"Translation: {result['Translation']}")

Score: 0.5995
Verse ID: 105|1
Translation: Seest thou not how thy Lord dealt with the Companions of the Elephant?
Score: 0.4297
Verse ID: 85|18
Translation: Of Pharaoh and the Thamud?
Score: 0.3962
Verse ID: 83|31
Translation: And when they returned to their own people, they would return jesting;


In [22]:
# Test Search
query2 = "What does the Qur’an say about the night and the day?"
results2 = search_verses(query2, k=5)

Query: 'What does the Qur’an say about the night and the day?'
Query embedding shape: (1, 384)
Found 5 results.


In [23]:
for i, result in enumerate(results2, 1):
    print(f"Score: {result['Score']:.4f}")
    print(f"Verse ID: {result['ID']}")
    print(f"Translation: {result['Translation']}")

Score: 0.6368
Verse ID: 86|2
Translation: And what will explain to thee what the Night-Visitant is?-
Score: 0.6265
Verse ID: 97|2
Translation: And what will explain to thee what the night of power is?
Score: 0.5573
Verse ID: 85|21
Translation: Nay, this is a Glorious Qur'an,
Score: 0.5423
Verse ID: 86|1
Translation: By the Sky and the Night-Visitant (therein);-
Score: 0.5368
Verse ID: 84|17
Translation: The Night and its Homing;


In [24]:
# Test Search
query3 = "How is Allah described?"
results3 = search_verses(query3, k=5)

Query: 'How is Allah described?'
Query embedding shape: (1, 384)
Found 5 results.


In [25]:
for i, result in enumerate(results3, 1):
    print(f"Score: {result['Score']:.4f}")
    print(f"Verse ID: {result['ID']}")
    print(f"Translation: {result['Translation']}")

Score: 0.6572
Verse ID: 96|14
Translation: Knoweth he not that Allah doth see?
Score: 0.6393
Verse ID: 112|1
Translation: Say: He is Allah, the One and Only;
Score: 0.6261
Verse ID: 112|2
Translation: Allah, the Eternal, Absolute;
Score: 0.6144
Verse ID: 85|9
Translation: Him to Whom belongs the dominion of the heavens and the earth! And Allah is Witness to all things.
Score: 0.5945
Verse ID: 80|17
Translation: Woe to man! What hath made him reject Allah;
