In [None]:
from dotenv import load_dotenv

load_dotenv(verbose=True)

%load_ext autoreload
%autoreload 2

!export PYTHONPATH=":./python"

In [None]:
#!pip3 install -U langchain-community faiss-cpu langchain-openai tiktoken
#!pip3 install -U giskard

In [None]:
import giskard
import pandas as pd
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader

In [None]:
# Prepare vector store (FAISS) with IPPC report

from genai_tk.core.embeddings_factory import EmbeddingsFactory
from genai_tk.core.llm_factory import get_llm
from genai_tk.core.prompts import def_prompt
from genai_tk.core.vector_store_registry import VectorStoreRegistry
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

vs_factory = VectorStoreRegistry(
    id="Chroma_in_memory",
    table_name_prefix="giskard_test",
    embeddings_factory=EmbeddingsFactory(),
)

DOC = "https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_LongerReport.pdf"
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100, add_start_index=True)
documents = PyPDFLoader(DOC).load()
texts = text_splitter.split_documents(documents)
vs_factory.add_documents(texts)


# Prepare QA chain
system_prompt = """You are the Climate Assistant, a helpful AI assistant made by Eviden.
Your task is to answer common questions on climate change.
You will be given a question and relevant excerpts from the IPCC Climate Change Synthesis Report (2023).
Please provide short and clear answers based on the provided context. Be polite and helpful.

Context:
{context}"""

user_prompt = """
Question:
{question}

Your answer:
"""

llm = get_llm(llm_id="gpt_35_openai")


prompt = def_prompt(system=system_prompt, user=user_prompt)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
climate_qa_chain = create_retrieval_chain(vs_factory.get().as_retriever(), question_answer_chain)

# chain.invoke({"input": query})

In [None]:
def model_predict(df: pd.DataFrame):
    """Wraps the LLM call in a simple Python function.

    The function takes a pandas.DataFrame containing the input variables needed
    by your model, and must return a list of the outputs (one for each row).
    """
    return [climate_qa_chain.invoke({"query": question}) for question in df["question"]]

In [None]:
from giskard.llm.client.openai import OpenAIClient

giskard.llm.set_llm_api("openai")
oc = OpenAIClient(model="gpt-4-turbo-preview")
giskard.llm.set_default_client(oc)

giskard_model = giskard.Model(
    model=model_predict,
    model_type="text_generation",
    name="Climate Change Question Answering",
    description="This model answers any question about climate change based on IPCC reports",
    feature_names=["question"],
)

In [None]:
scan_results = giskard.scan(giskard_model)

In [None]:
display(scan_results)

In [None]:
# Or save it to a file
scan_results.to_html("scan_results.html")