<a href="https://colab.research.google.com/github/ndecavel/tdwi-llm/blob/main/%5BMAIN%5D_Lab_4_Giskard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os

from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core import Settings

import giskard
import pandas as pd

In [None]:
### Set ENV Variables
os.environ["OPENAI_API_KEY"] = ### Insert OPENAI API KEY

os.environ["GEMINI_API_KEY"] = ### Insert GEMINI API KEY


### Zilliz API Info
os.environ["ZILLIZ_API_KEY"] = ### Insert Zilliz API KEY

os.environ["CLOUD_REGION"] = ### Zilliz cloud region

os.environ["CLUSTER_ID"] = ### Zilliz cluster id

os.environ["PROJECT_ID"] = ### Zilliz project id

os.environ["CLUSTER_ENDPOINT"] = ### Zilliz endpoint


GPT_MODEL = 'gpt-4o-mini'

llm = OpenAI(model=GPT_MODEL)

Settings.llm = llm

IMPROVED_COLLECTION_NAME = "Improved_RAG"

In [None]:
## 1. Connect to the Zilliz Cloud cluster and create our collection

vector_store = MilvusVectorStore(uri=os.getenv("CLUSTER_ENDPOINT"),
                             token=os.getenv("ZILLIZ_API_KEY"),
                              collection_name=IMPROVED_COLLECTION_NAME,
                              overwrite=False,
                              enable_sparse=True,
                              hybrid_ranker="RRFRanker",
                              hybrid_ranker_params={"k": 60},
                          )

index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

hybrid_query_engine = index.as_query_engine(vector_store_query_mode="hybrid")

In [None]:
def model_predict(df: pd.DataFrame):
    """Wraps the LLM call in a simple Python function.

    The function takes a pandas.DataFrame containing the input variables needed
    by your model, and must return a list of the outputs (one for each row).
    """
    return [hybrid_query_engine.query(question) for question in df["question"]]

giskard_model = giskard.Model(
  model=model_predict,
    model_type="text_generation",
    name="BEHR Paint Technical Data Sheet Question Answering",
    description="This model answers any question about BEHR paint technical data sheets and the content that may be on them.",
    feature_names=["question"],
)

In [None]:
golden_df = pd.read_csv('../data/Golden_Test_Data_DeepEval.csv')

golden_df.drop(columns=['Unnamed: 0.1', 'Unnamed: 0'], inplace=True)

golden_df = golden_df.rename(columns={'input':'question', 'expected_output':'ground_truth'})

# Let's grab a handful of examples
examples = golden_df.sample(10)["question"].tolist()

giskard_dataset = giskard.Dataset(pd.DataFrame({"question": examples}), target=None)

print(giskard_model.predict(giskard_dataset).prediction)

### Scan your model for vulnerabilities with Giskard

We can now run Giskard's `scan` to generate an automatic report about the model vulnerabilities. This will thoroughly test different classes of model vulnerabilities, such as harmfulness, hallucination, prompt injection, etc.

The scan will use a mixture of tests from predefined set of examples, heuristics, and LLM-based generations and evaluations.

Since running the whole scan can take a bit of time, let’s start by limiting the analysis to the hallucination category:

In [None]:
report = giskard.scan(giskard_model, giskard_dataset, only="hallucination")

In [None]:
display(report)

In [None]:
full_report = giskard.scan(giskard_model)


display(full_report)

# Save it to a file
full_report.to_html("../data/scan_report.html")

In [None]:
full_report.generate_rails()

In [None]:
test_suite = full_report.generate_test_suite(name="Test suite generated by scan")
test_suite.run()