# SBERT Embeddings for Quran Verses
This notebook loads Quran verses, generates SBERT embeddings, and prepares multi-label targets.

In [None]:
# Install required libraries
!pip install sentence-transformers pandas scikit-learn

In [None]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import MultiLabelBinarizer
import pickle

## Load and Prepare Dataset

In [None]:
# Load the dataset (update path as needed)
df = pd.read_csv("QuranDS.csv", encoding='ISO-8859-1')

# Combine label columns into a list
label_columns = [f'lb{i}' for i in range(1, 13)]
df['labels'] = df[label_columns].values.tolist()
df['labels'] = df['labels'].apply(lambda x: [str(label).strip() for label in x if pd.notna(label)])

# Preview
df[['verse', 'labels']].head()

## Generate SBERT Embeddings

In [None]:
# Load SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings
embeddings = model.encode(df['verse'].tolist(), show_progress_bar=True)
X = np.array(embeddings)

## Encode Labels (MultiLabel)

In [None]:
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(df['labels'])
label_classes = mlb.classes_

print("Label shape:", y.shape)
print("Example labels:", df['labels'].iloc[0], "=>", y[0])

## Save Embeddings and Labels

In [None]:
with open("sbert_embeddings_quran.pkl", "wb") as f:
    pickle.dump((X, y, label_classes), f)

print("Embeddings and labels saved to sbert_embeddings_quran.pkl")