In [5]:
import pandas as pd
import sys
import os
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document

from sqlalchemy import create_engine

In [None]:
#! pip install -U langchain openai chromadb langchain-experimental # (newest versions required for multi-modal)

In [None]:
# lock to 0.10.19 due to a persistent bug in more recent versions
#! pip install "unstructured[all-docs]==0.10.19" pillow pydantic lxml pillow matplotlib tiktoken open_clip_torch torch

In [9]:
# Add the parent directory to sys.path
parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)

from utils import preprocessing # functions for preprocessing the pdf texts
from utils import txt_gen # functions to generate ai based texts
from utils import vectors # functions for creating vector database


from dotenv import find_dotenv, load_dotenv

_ = load_dotenv(find_dotenv())
OPENAI_API_KEY  = os.getenv('OPENAI_API_KEY')
LANGCHAIN_API_KEY = os.getenv("LANGSMITH_API_KEY")

In [17]:
root_dir = "../../1_notification_scraper/NOTIFICATIONS"
law_code = "2020-0791-F"

In [18]:
# Define the path to the SQLite database
db_path = os.path.join("..", "db", "notifications.db")
engine = create_engine(f"sqlite:///{db_path}")

# Load only the required columns into a DataFrame
query = """
    SELECT tris_id, title, year, country_labels, url, 
           executive_summary_draft_law, category_labels_broad, category_labels
    FROM notifications
"""
notifications_md = pd.read_sql_query(query, con=engine)

In [19]:
"""
vectorstore = Chroma(
        collection_name="test_tris",
        embedding_function=OpenAIEmbeddings(),
        persist_directory=os.path.join("..", "db")
    )

vectorstore.persist()
"""

'\nvectorstore = Chroma(\n        collection_name="test_tris",\n        embedding_function=OpenAIEmbeddings(),\n        persist_directory=os.path.join("..", "db")\n    )\n\nvectorstore.persist()\n'

In [29]:
def get_doc(root_dir,law_code,metadata):
    #parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
    #sys.path.append(parent_dir)
    
    #print('os.getcwd()',os.getcwd())
    #root_dir = os.getcwd()+'/static/TRIS_DOCUMENTS_FULL'
    folder_name = law_code.replace('-','_') 
    folder_path = os.path.join(root_dir, folder_name)
    notification_path = os.path.join(folder_path, 'notification')
    
            # List of files in the notification folder
    files_in_notification = os.listdir(notification_path)
     # Identify paths for DRAFT and FINAL files in English
    draft_file_path = next(
                (os.path.join(notification_path, file) for file in files_in_notification
                 if "DRAFT" in file and "EN" in file and file.lower().endswith(('.pdf', '.docx','.doc'))),
                None
            )

    data = (draft_file_path,metadata)
    #print('draft_file_path+meta:',data)
    
    raw_elements_with_md = preprocessing.extract_elements_with_metadata(data[0],metadata=data[1])
    
    # Categorize into text and tables
    texts, tables = preprocessing.categorize_elements(raw_elements_with_md)
    #print('processed texts:',texts)
    
    #generate image summaries
    img_base64_list, image_summaries = txt_gen.generate_img_summaries(data[0])
    
    # Set up Chroma vector store with persistence
    vectorstore = Chroma(
        collection_name="test_tris",
        embedding_function=OpenAIEmbeddings(),
        #persist_directory=os.path.join("..", "db")
    )

    # Persist data after adding documents
    #vectorstore.persist()
    # Create retriever
    text_summaries, table_summaries = txt_gen.generate_text_summaries(
        OPENAI_API_KEY,texts, tables, summarize_texts=True
    )
    retriever_multi_vector_img = vectors.create_multi_vector_retriever(
        vectorstore,
        text_summaries,
        texts,
        table_summaries,
        tables,
        image_summaries,
        img_base64_list,
    )
    return retriever_multi_vector_img

In [30]:
def chatbot(query,retriever_multi_vector_img):
    chain_multimodal_rag = txt_gen.multi_modal_rag_chain(retriever_multi_vector_img)
    return chain_multimodal_rag.invoke(query)

In [31]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"]="tris project"

In [32]:
law_code_list = [
    "2020-0791-F",
    "2020-0832-F",
    "2023-0503-SE"
]

evaluation_data = []

for law_code in law_code_list:
    
    law_code_md = notifications_md[notifications_md['tris_id']==law_code].to_dict(orient='records')[0]
    multimodal_retriever = get_doc(root_dir,law_code,law_code_md)
    general_question = f"the question addresses {law_code}: "
    user_input = "please summarize this draft legislation"
    user_input = general_question+user_input
    if user_input:
        response = chatbot(user_input, multimodal_retriever)
        evaluation_data.append({'id':law_code,
                              'question':user_input,
                              'generated_answer':response,
                              'original_summary': law_code_md['executive_summary_draft_law']
                              }
                              )

Processing: ../../1_notification_scraper/NOTIFICATIONS/2020_0791_F/notification/2020_0791_FDRAFT_N-2020-0791-000-EN.docx


ImportError: partition_docx is not available. Install the docx dependencies with pip install "unstructured[docx]"

## Evaluations

### evaluation metrices:

In [27]:
from rouge_score import rouge_scorer

def rouge_summary_evaluation(run, example):
    """
    Evaluates generated output against reference using ROUGE metrics for summarization.
    Returns an average of ROUGE-1, ROUGE-2, and ROUGE-L F1 scores as a single score.
    """
    predicted_output = run.outputs["output"]
    reference_output = example.outputs["output"]

    # Initialize ROUGE scorer
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference_output, predicted_output)

    # Calculate the average F1 score across ROUGE-1, ROUGE-2, and ROUGE-L
    avg_f1_score = (scores['rouge1'].fmeasure + scores['rouge2'].fmeasure + scores['rougeL'].fmeasure) / 3

    # Return a single score as a dictionary
    return {"key": "rouge_summary_evaluation", "score": avg_f1_score}

#### wrapper function for evaluation

In [15]:
# Define a wrapper function to dynamically create the retriever and call the chatbot
def chatbot_with_retriever(input):
    # Extract question and law_code from the input
    question = input["question"]
    law_code = input["law_code"]
    print('law code',law_code)
    law_code_md = notifications_md[notifications_md['tris_id'] == law_code].to_dict(orient='records')[0]
    
    #multimodal_retriever for each example
    multimodal_retriever = get_doc(root_dir, law_code, law_code_md)

    # Get the chatbot response
    return {"output": chatbot(question, multimodal_retriever)}

In [16]:
from langsmith import Client, evaluate
import os

# Initialize Langsmith client
client = Client()

# Define your test cases
dataset_name = "notification evaluator 1"
dataset = client.create_dataset(
    dataset_name, 
    description="Dataset for testing Q&A bot responses on summarizing draft legislation"
)

In [None]:
from langsmith import Client

# Initialize Langsmith client
client = Client()

# Define the dataset name
dataset_name = "notification evaluator 1"

# Check if the dataset exists
existing_datasets = client.list_datasets()  # Fetch a list of existing datasets
dataset = next((ds for ds in existing_datasets if ds.name == dataset_name), None)

# If the dataset does not exist, create it; otherwise, use the existing one
if dataset is None:
    dataset = client.create_dataset(
        dataset_name, 
        description="Dataset for testing Q&A bot responses on summarizing draft legislation"
    )
else:
    print(f"Using existing dataset: {dataset.name}")

# Now, you can use the dataset object to add examples or perform other operations.


In [28]:
# Define law codes to evaluate
law_code_list = ["2020-0791-F", "2020-0832-F"]

In [29]:
# Populate the dataset with examples
for law_code in law_code_list:
    # Generate law metadata for the current law code
    law_code_md = notifications_md[notifications_md['tris_id'] == law_code].to_dict(orient='records')[0]
    question = f"the question addresses {law_code}: please summarize this draft legislation"
    expected_summary = law_code_md['executive_summary_draft_law']
    
    #add correct responses and law id to the client
    client.create_example(
        inputs={"question": question, "law_code": law_code},
        outputs={"output": expected_summary},
        dataset_id=dataset.id,
    )

In [30]:
# Add the parent directory to sys.path
parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)

from utils import preprocessing # functions for preprocessing the pdf texts
from utils import txt_gen # functions to generate ai based texts

In [None]:
# Run the evaluation
experiment_results = evaluate(
    lambda input: chatbot_with_retriever(input),
    data=dataset_name,
    evaluators=[rouge_summary_evaluation],
    experiment_prefix="qa-bot-experiment",
    metadata={
        "version": "1.0.0",
        "revision_id": "qa-testing"
    },
)

# Print or analyze the experiment results
print("On Langsmith check the Experiment results on:", experiment_results)