In [1]:
from devtools import debug
from dotenv import load_dotenv

import os
import sys

project_root = '/mnt/c/Users/a884470/prj/genai-blueprint-main'  # Change this if needed
sys.path.append(os.path.join(project_root, 'python'))

load_dotenv(verbose=True)

%load_ext autoreload
%autoreload 2

#!export PYTHONPATH=":./python"

In [None]:
#!pip3 install -U langchain-community faiss-cpu langchain-openai tiktoken
#!pip3 install -U giskard

In [3]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import giskard
import pandas as pd
from langchain_community.document_loaders import PyPDFLoader

In [6]:
# Prepare vector store (FAISS) with IPPC report

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

from ai_core.embeddings import EmbeddingsFactory
from ai_core.llm import get_llm
from ai_core.prompts import def_prompt
from ai_core.vector_store import VectorStoreFactory

vs_factory = VectorStoreFactory(
    id="Chroma_in_memory",
    collection_name="giskard_test",
    embeddings_factory=EmbeddingsFactory(),
)

DOC = (
    "https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_LongerReport.pdf"
)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=100, add_start_index=True
)
documents = PyPDFLoader(DOC).load()
texts = text_splitter.split_documents(documents)
vs_factory.add_documents(texts)


# Prepare QA chain
system_prompt = """You are the Climate Assistant, a helpful AI assistant made by Eviden.
Your task is to answer common questions on climate change.
You will be given a question and relevant excerpts from the IPCC Climate Change Synthesis Report (2023).
Please provide short and clear answers based on the provided context. Be polite and helpful.

Context:
{context}"""

user_prompt = """
Question:
{question}

Your answer:
"""

llm = get_llm(llm_id="gpt_35_azure")


prompt = def_prompt(system=system_prompt, user=user_prompt)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
climate_qa_chain = create_retrieval_chain(
    vs_factory.vector_store.as_retriever(), question_answer_chain
)

# chain.invoke({"input": query})



[32m2024-11-15 09:04:09.707[0m | [1mINFO    [0m | [36mai_core.vector_store[0m:[36mvector_store[0m:[36m154[0m - [1mget vector store  : Chroma_in_memory/giskard_test_ada_002_azure[0m
[32m2024-11-15 09:04:35.911[0m | [1mINFO    [0m | [36mai_core.llm[0m:[36mget_llm[0m:[36m409[0m - [1mget LLM:'gpt_35_azure'[0m


In [9]:
def model_predict(df: pd.DataFrame):
    """Wraps the LLM call in a simple Python function.

    The function takes a pandas.DataFrame containing the input variables needed
    by your model, and must return a list of the outputs (one for each row).
    """
    return [climate_qa_chain.invoke({"query": question}) for question in df["question"]]

In [34]:
import giskard
import os
from giskard.llm.client.openai import OpenAIClient


api_key = os.getenv("AZURE_OPENAI_API_KEY")

if not api_key:
    raise ValueError("API key not found. Please set AZURE_OPENAI_API_KEY in your environment variables.")
else:
    print("API key found.")
    
giskard.llm.set_llm_api("azure")

# Set the OPENAI_API_KEY environment variable for Giskard
os.environ["OPENAI_API_KEY"] = api_key

# Initialize OpenAIClient with the API key
oc = OpenAIClient(model="gpt-4o")
oc.api_key = api_key

# Set the client as the default for Giskard
giskard.llm.set_default_client(oc)

giskard_model = giskard.Model(
    model=model_predict,
    model_type="text_generation",
    name="Climate Change Question Answering",
    description="This model answers any question about climate change based on IPCC reports",
    feature_names=["question"],
)

API key found.
2024-11-15 09:53:45,025 pid:1349 MainThread giskard.models.automodel INFO     Your 'prediction_function' is successfully wrapped by Giskard's 'PredictionFunctionModel' wrapper class.


In [35]:
scan_results = giskard.scan(giskard_model)

2024-11-15 09:53:49,236 pid:1349 MainThread httpx        INFO     HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 401 Unauthorized"


LLMConfigurationError: Could not authenticate with OpenAI API. Please make sure you have configured the API key by setting OPENAI_API_KEY in the environment.

In [32]:
display(scan_results)

NameError: name 'scan_results' is not defined

In [None]:
# Or save it to a file
scan_results.to_html("scan_results.html")