# GPT-Based Multi-Label Classification for Quran Verses
This notebook uses SBERT embeddings and OpenAI GPT API to predict multi-labels for Quranic verses.

In [None]:
# Install necessary packages
!pip install openai 
!pip install jsonlines

## Load Data and Imports

In [None]:
import pickle
import numpy as np
import openai
from sklearn.model_selection import train_test_split

# Load SBERT embeddings and labels
with open("sbert_embeddings_quran.pkl", "rb") as f:
    X, y, label_classes = pickle.load(f)

# Optional: load original verses for reference
import pandas as pd
df = pd.read_csv("QuranDS.csv", encoding='ISO-8859-1')
verses = df['verse'].tolist()

# Prepare train/test split
X_train, X_test, y_train, y_test, verses_train, verses_test = train_test_split(
    X, y, verses, test_size=0.1, random_state=42)

## Configure OpenAI API

In [None]:
# Set your OpenAI API key
import os
openai.api_key = ("your-api-key-here")  # Use key here  

In [None]:
# View all the current models under training
response = openai.FineTune.list()

for model in response['data']:
    print(model['id'], " : ", model['status'], " : ", model['fine_tuned_model'])

## GPT Label Prediction Function

In [None]:
def predict_labels_with_gpt(verse, label_options):
    prompt = (
        f"Quran Verse: \"{verse}\"\n"
        f"Available Labels: {', '.join(label_options)}\n"
        f"Predict relevant labels from the list above that apply to this verse."
        f" Return them as a comma-separated list.\n"
    )
    
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant for classifying Quranic verses."},
            {"role": "user", "content": prompt},
        ]
    )
    
    reply = response['choices'][0]['message']['content']
    return [label.strip().lower() for label in reply.split(',') if label.strip()]

## Predict and Evaluate

In [None]:
from sklearn.metrics import classification_report

# Predict labels for a few test examples
n_samples = 283
predicted_labels = []
true_labels = []
sample_title = []
for i in df['verse']:

  Input_String = i
  use_prompt = f"String:{Input_String} \n\n###\n\n"

  result = openai.Completion.create(
      model="davinci:ft-launch-wild-2024-10-08-11-01-21",
      prompt = use_prompt,
      temperature=0.7,
      max_tokens=50,
  #     top_p=1,
      n = 1,
      frequency_penalty=0,
      presence_penalty=0,
      stop=["END"])


  result = result['choices'][0]['text'].strip()
  sample_title.append(result)
  print("Predicted Titles: ", result)
for i in range(n_samples):
    verse = verses_test[i]
    gpt_labels = predict_labels_with_gpt(verse, label_classes)
    predicted_labels.append(gpt_labels)
    
    # Get true labels
    true = [label_classes[j] for j in range(len(label_classes)) if y_test[i][j] == 1]
    true_labels.append(true)
    
    print(f"\nVerse: {verse}\nTrue Labels: {true}\nGPT Predicted: {gpt_labels}")

---
**Note:** GPT predictions are not directly usable for training but useful for bootstrapping or validating label schemes.