In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load & preprocess data
df=pd.read_excel("Arabic_English_Translation.xlsx")

In [3]:
df.shape

(48, 3)

In [4]:
df.columns

Index(['ID', 'Arabic', 'Translation'], dtype='object')

In [5]:
df

Unnamed: 0,ID,Arabic,Translation
0,105|1,اَلَمۡ تَرَ کَیۡفَ فَعَلَ رَبُّکَ بِاَصۡحٰبِ ا...,Seest thou not how thy Lord dealt with the Com...
1,105|2,اَلَمۡ یَجۡعَلۡ کَیۡدَہُمۡ فِیۡ تَضۡلِیۡلٍ,Did He not make their treacherous plan go astray?
2,105|3,وَّ اَرۡسَلَ عَلَیۡہِمۡ طَیۡرًا اَبَابِیۡلَ,"And He sent against them Flights of Birds,"
3,105|4,تَرۡمِیۡہِمۡ بِحِجَارَۃٍ مِّنۡ سِجِّیۡلٍ,Striking them with stones of baked clay.
4,105|5,فَجَعَلَہُمۡ کَعَصۡفٍ مَّاۡکُوۡلٍ,Then did He make them like an empty field of s...
5,106|1,لِاِیۡلٰفِ قُرَیۡشٍ,For the covenants (of security and safeguard e...
6,106|2,اٖلٰفِہِمۡ رِحۡلَۃَ الشِّتَآءِ وَ الصَّیۡفِ,Their covenants (covering) journeys by winter ...
7,106|3,فَلۡیَعۡبُدُوۡا رَبَّ ہٰذَا الۡبَیۡتِ,"Let them adore the Lord of this House,"
8,106|4,الَّذِیۡۤ اَطۡعَمَہُمۡ مِّنۡ جُوۡعٍ ۬ ۙ وَّ اٰ...,"Who provides them with food against hunger, an..."
9,107|1,اَرَءَیۡتَ الَّذِیۡ یُکَذِّبُ بِالدِّیۡنِ,Seest thou one who denies the Judgment (to come)?


In [6]:
verses = []
texts = []

for idx, row in df.iterrows():
    verse_id = str(row['ID'])
    arabic_text = str(row['Arabic'])
    translation = str(row['Translation'])
    
    # Combine Arabic and English for embedding
    combined_text = f"{arabic_text} {translation}"
    
    verses.append({
        'id': verse_id,
        'arabic': arabic_text,
        'translation': translation,
        'combined_text': combined_text
    })
    texts.append(combined_text)

In [7]:
texts

['اَلَمۡ تَرَ کَیۡفَ فَعَلَ رَبُّکَ بِاَصۡحٰبِ الۡفِیۡلِ Seest thou not how thy Lord dealt with the Companions of the Elephant?',
 'اَلَمۡ یَجۡعَلۡ کَیۡدَہُمۡ فِیۡ تَضۡلِیۡلٍ Did He not make their treacherous plan go astray?',
 'وَّ اَرۡسَلَ عَلَیۡہِمۡ طَیۡرًا اَبَابِیۡلَ And He sent against them Flights of Birds,',
 'تَرۡمِیۡہِمۡ بِحِجَارَۃٍ مِّنۡ سِجِّیۡلٍ Striking them with stones of baked clay.',
 'فَجَعَلَہُمۡ کَعَصۡفٍ مَّاۡکُوۡلٍ Then did He make them like an empty field of stalks and straw, (of which the corn) has been eaten up.',
 'لِاِیۡلٰفِ قُرَیۡشٍ For the covenants (of security and safeguard enjoyed) by the Quraish,',
 'اٖلٰفِہِمۡ رِحۡلَۃَ الشِّتَآءِ وَ الصَّیۡفِ Their covenants (covering) journeys by winter and summer,-',
 'فَلۡیَعۡبُدُوۡا رَبَّ ہٰذَا الۡبَیۡتِ Let them adore the Lord of this House,',
 'الَّذِیۡۤ اَطۡعَمَہُمۡ مِّنۡ جُوۡعٍ ۬ ۙ وَّ اٰمَنَہُمۡ مِّنۡ خَوۡفٍ Who provides them with food against hunger, and with security against fear (of danger).',
 'اَرَءَیۡتَ 

In [8]:
from sentence_transformers import SentenceTransformer

# Load a pretrained Sentence Transformer model
model = SentenceTransformer("BAAI/bge-m3")

In [9]:
model.max_seq_length

8192

In [10]:
# Generate Embeddings
embeddings = model.encode(texts, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=True)

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
# Embeddings shape
embeddings.shape

(48, 1024)

In [12]:
# Data type
embeddings.dtype

dtype('float32')

In [13]:
import faiss
dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(embeddings)

In [14]:
# Define Search Function
def search_verses(query, k):
    print(f"Query: '{query}'")
    
    # Generate query embedding
    query_embedding = model.encode([query])
    print(f"Query embedding shape: {query_embedding.shape}")
    
    # Normalize query embedding
    faiss.normalize_L2(query_embedding)
    
    # Search
    scores, indices = index.search(query_embedding.astype(np.float32), k)
    print(f"Found {len(indices[0])} results.")
    
    # Format results
    results = []
    for score, idx in zip(scores[0], indices[0]):
        results.append({
            'score': float(score),
            'id': verses[idx]['id'],
            'arabic': verses[idx]['arabic'],
            'translation': verses[idx]['translation']
        })
    
    return results

In [15]:
# Test Search
query1 = "What happened to the people of the elephant?"
results1 = search_verses(query1, k=3)

Query: 'What happened to the people of the elephant?'
Query embedding shape: (1, 1024)
Found 3 results.


In [16]:
for i, result in enumerate(results1, 1):
    print(f"Score: {result['score']:.4f}")
    print(f"Verse ID: {result['id']}")
    print(f"Arabic: {result['arabic']}")
    print(f"Translation: {result['translation']}")

Score: 0.5454
Verse ID: 105|1
Arabic: اَلَمۡ تَرَ کَیۡفَ فَعَلَ رَبُّکَ بِاَصۡحٰبِ الۡفِیۡلِ
Translation: Seest thou not how thy Lord dealt with the Companions of the Elephant?
Score: 0.4465
Verse ID: 107|4
Arabic: فَوَیۡلٌ لِّلۡمُصَلِّیۡنَ
Translation: So woe to the worshippers
Score: 0.4187
Verse ID: 105|4
Arabic: تَرۡمِیۡہِمۡ بِحِجَارَۃٍ مِّنۡ سِجِّیۡلٍ
Translation: Striking them with stones of baked clay.
