In [8]:
# waring control
import warnings
warnings.filterwarnings('ignore')

In [9]:
import os

# Set the USER_AGENT variable before using WebBaseLoader
os.environ["USER_AGENT"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"

In [10]:
# Importing required libraries
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Enter API key for Langsmith: ")

# Chat Model : Groq

In [11]:
# Importing required libraries
import getpass
import os

if not os.environ.get("GROQ_API_KEY"):
  os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for Groq: ")
  

from langchain.chat_models import init_chat_model

llm = init_chat_model("llama3-8b-8192", model_provider="groq")

## Embedding model : HuggingFace

In [12]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Vector Store : In-Memory

In [13]:
# Importing required libraries
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [14]:
# Importing required libraries
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [15]:
# Executing core logic

import textwrap

response = graph.invoke({"question": "what is Gradient based Attacks"})
print("\n".join(textwrap.wrap(response["answer"], width=80)))

Gradient-based attacks are a type of attack that relies on gradient descent to
programmatically learn the most effective attacks in the white-box setting,
where the model's parameters and architecture are fully accessible. This type of
attack only works in the white-box setting and is not applicable to black-box
settings.


In [16]:
response = graph.invoke({"question": "what is Saddle Point Problem"})
print("\n".join(textwrap.wrap(response["answer"], width=80)))

The Saddle Point Problem is a mathematical formulation of a bi-level
optimization process, where the objective of training a robust classifier is to
find the optimal model parameterization that minimizes the loss with the most
effective attacks triggered from the inner maximization process, while the inner
maximization finds the most effective adversarial data point that leads to high
loss.
