In [None]:
import os
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import DashScopeEmbeddings  
from langchain_core.prompts import ChatPromptTemplate
import pickle


In [None]:

dashscope_api_key = "<put_your_dashscope_api_key_here>"
dashscope_base_url = "<put_your_dashscope_base_url_here>"


with open("./Data/all_splits.pkl", "rb") as f:
    all_splits = pickle.load(f)

In [3]:

# Build your own system prompt in Arabic or adjusted tone
prompt = ChatPromptTemplate.from_messages([
    ("system",  " أنت عالم فقه مسلم فى غايه الذكاء مقدم لك بيانات لتستمد منها اجاباتك بناء على سؤال مقدم لك واعلم انه يوجد ايات قرءانية مكتوبه بطريقه خطأ لا تستمد منها أيضا تجنب الحروف غير العربية"),
    ("human", "استخدم السياق التالي للإجابة على السؤال:\n\n{context}\n\nالسؤال: {question}\n\nالإجابة:")
])



In [None]:


embeddings = DashScopeEmbeddings(
    dashscope_api_key=dashscope_api_key,
    model="text-embedding-v1"  
)

# Create vectorstore
vector_store = Chroma(embedding_function=embeddings)
_ = vector_store.add_documents(documents=all_splits)

retriever = vector_store.as_retriever(   
    search_type="similarity",    # or "mmr", "similarity_score_threshold"
    search_kwargs={"k": 3}       # how many results to retrieve
)


In [None]:
# Configure LLM for DashScope
llm = ChatOpenAI(
    model_name="qwen3-8b", 
    openai_api_key=dashscope_api_key,
    openai_api_base=dashscope_base_url,
    temperature=0,
)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


### Run evaluation

In [None]:
import pandas as pd
from tqdm import tqdm

Data = pd.read_csv(".\evaluation_dataset_plus_164.csv")

In [42]:
model = "deepseek"
Data[model] = ''
for i in tqdm(range(Data.shape[0]), desc="Processing questions"):
# Re-run the specific questions that had errors
    question = Data['question'][i]
    try:
        Data.loc[i, model] = rag_chain.invoke(question)
    except :
        try:
            # Fallback: Try getting response without context
            prompt_without_context = ChatPromptTemplate.from_messages([
                ("system", " أنت عالم فقه مسلم فى غايه الذكاء"),
                ("human", "{question}")
            ])
            chain_without_context = prompt_without_context | llm | StrOutputParser()
            out = chain_without_context.invoke({"question": question})
            Data.loc[i, model] = out
            print(f"Got fallback response for question {i}")
        except Exception as e2:
            print(f"Both attempts failed for question {i}: {e2}")
            Data.loc[i, model] = "No Response"


Processing questions:   1%|          | 1/163 [00:55<2:28:56, 55.16s/it]

Got fallback response for question 0


Processing questions:   3%|▎         | 5/163 [04:40<2:15:03, 51.29s/it]

Got fallback response for question 4


Processing questions:   9%|▉         | 15/163 [11:12<1:32:23, 37.46s/it]

Got fallback response for question 14


Processing questions:  11%|█         | 18/163 [14:04<2:00:25, 49.83s/it]

Got fallback response for question 17


Processing questions:  15%|█▌        | 25/163 [18:36<1:49:56, 47.80s/it]

Got fallback response for question 24


Processing questions:  18%|█▊        | 29/163 [21:56<1:45:45, 47.35s/it]

Got fallback response for question 28


Processing questions:  27%|██▋       | 44/163 [33:31<1:19:13, 39.94s/it]

Got fallback response for question 43


Processing questions:  29%|██▉       | 48/163 [38:00<2:14:07, 69.98s/it]

Got fallback response for question 47


Processing questions:  37%|███▋      | 61/163 [47:32<1:26:38, 50.97s/it]

Got fallback response for question 60


Processing questions:  38%|███▊      | 62/163 [49:14<1:51:50, 66.44s/it]

Got fallback response for question 61


Processing questions:  42%|████▏     | 69/163 [54:29<1:06:32, 42.47s/it]

Both attempts failed for question 68: Error code: 400 - {'error': {'code': 'data_inspection_failed', 'param': None, 'message': 'Input data may contain inappropriate content.', 'type': 'data_inspection_failed'}, 'id': 'chatcmpl-1e9a90b0-f2ba-958c-bb8d-94fef518e33f', 'request_id': '1e9a90b0-f2ba-958c-bb8d-94fef518e33f'}


Processing questions:  44%|████▎     | 71/163 [57:21<1:39:10, 64.68s/it]

Got fallback response for question 70


Processing questions:  48%|████▊     | 79/163 [1:02:53<1:06:46, 47.69s/it]

Got fallback response for question 78


Processing questions:  52%|█████▏    | 84/163 [2:00:27<20:36:06, 938.81s/it]

Both attempts failed for question 83: Connection error.


Processing questions:  52%|█████▏    | 85/163 [2:00:28<14:14:48, 657.54s/it]

Both attempts failed for question 84: Connection error.


Processing questions:  53%|█████▎    | 86/163 [2:00:29<9:51:12, 460.68s/it] 

Both attempts failed for question 85: Connection error.


Processing questions:  53%|█████▎    | 87/163 [2:00:30<6:48:59, 322.88s/it]

Both attempts failed for question 86: Connection error.


Processing questions:  54%|█████▍    | 88/163 [2:00:32<4:43:01, 226.42s/it]

Both attempts failed for question 87: Connection error.


Processing questions:  55%|█████▍    | 89/163 [2:00:33<3:15:56, 158.87s/it]

Both attempts failed for question 88: Connection error.


Processing questions:  55%|█████▌    | 90/163 [2:00:34<2:15:49, 111.64s/it]

Both attempts failed for question 89: Connection error.


Processing questions:  56%|█████▌    | 91/163 [2:00:36<1:34:13, 78.52s/it] 

Both attempts failed for question 90: Connection error.


Processing questions:  56%|█████▋    | 92/163 [2:02:01<1:35:09, 80.42s/it]

Got fallback response for question 91


Processing questions:  58%|█████▊    | 95/163 [2:03:44<56:39, 50.00s/it]  

Got fallback response for question 94


Processing questions:  63%|██████▎   | 102/163 [2:08:47<38:29, 37.86s/it]

Got fallback response for question 101


Processing questions:  68%|██████▊   | 111/163 [2:13:13<28:27, 32.84s/it]

Got fallback response for question 110


Processing questions:  85%|████████▍ | 138/163 [2:30:34<14:56, 35.88s/it]

Got fallback response for question 137


Processing questions:  88%|████████▊ | 143/163 [2:34:05<14:48, 44.42s/it]

Got fallback response for question 142


Processing questions:  93%|█████████▎| 152/163 [2:40:31<09:45, 53.27s/it]

Got fallback response for question 151


Processing questions: 100%|██████████| 163/163 [2:47:27<00:00, 61.64s/it]
