##Prerequisites

In [None]:
import torch
torch.__version__

In [None]:
!pip install -U transformers sentence-transformers  datasets tqdm

In [None]:
!pip install faiss-cpu

In [None]:
import torch
import transformers
import faiss
import sentence_transformers

print("Torch:", torch.__version__)
print("Transformers:", transformers.__version__)
print("FAISS:", faiss.__version__)
print("Sentence Transformers:", sentence_transformers.__version__)

##Exploratory Data Analysis

In [None]:
import pandas as pd

df = pd.read_csv("data.csv")
df.head()

In [None]:
print(df['intent'].value_counts())

In [None]:
df.isna().sum()

In [None]:
df = df.rename(columns={"instruction": "question", "response": "answer"})  # adapt as needed

In [None]:
df.columns

In [None]:
df["question"] = df["question"].astype(str).str.strip()
df["answer"] = df["answer"].astype(str).str.strip()

In [None]:
print(df.shape)

In [None]:
df.head()

##Define Embeddings

In [None]:

from sentence_transformers import SentenceTransformer

# Load the embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
# Extract all the questions
questions = df["question"].tolist()

# Generate vector embeddings (768-d)
question_embeddings = embedding_model.encode(questions, show_progress_bar=True)

##Define FAISS Database

In [None]:
import faiss
import numpy as np

# Convert to numpy array (float32 required by FAISS)
question_embeddings = np.array(question_embeddings).astype("float32")

# Create FAISS index
index = faiss.IndexFlatL2(question_embeddings.shape[1])  # L2 = Euclidean distance
index.add(question_embeddings)

print(f"FAISS index has {index.ntotal} vectors.")

In [None]:
def retrieve_similar_tickets(query, top_k=3):
    # Step 1: Embed the query
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding).astype("float32")

    # Step 2: Search FAISS
    distances, indices = index.search(query_embedding, top_k)

    # Step 3: Fetch the matching answers from the dataframe
    matched_questions = [df.iloc[idx]["question"] for idx in indices[0]]
    matched_answers = [df.iloc[idx]["answer"] for idx in indices[0]]

    return matched_questions, matched_answers

In [None]:
query = "I want to cancel my recent purchase"
questions, answers = retrieve_similar_tickets(query)

for i in range(len(questions)):
    print(f"Matched Q{i+1}: {questions[i]}")
    print(f"Matched A{i+1}: {answers[i]}")
    print("---")

In [None]:
!pip install -U  accelerate bitsandbytes einops

##Import Zephyr Model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "HuggingFaceH4/zephyr-7b-beta"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    load_in_4bit=True,
    torch_dtype=torch.float16,
    trust_remote_code=True
)

In [None]:
def generate_response_local(query, retrieved_answers):
    context = "\n\n".join(retrieved_answers)

    prompt = f"""<|system|>
    You are a helpful, empathetic customer support assistant who always answers in a kind and professional tone.
    <|user|>
    Customer's Question:{query}

    Relevant Past Support Responses:{context}

    Now answer the customer's query in a clear, friendly tone:
    <|assistant|>
    """

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=300,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("<|assistant|>")[-1].strip()

In [None]:
def get_rag_response(query, top_k=3):
    # Retrieve similar tickets
    matched_questions, matched_answers = retrieve_similar_tickets(query, top_k=top_k)

    # Generate the final LLM response
    final_response = generate_response_local(query, matched_answers)

    return final_response

##Sample Test

In [None]:
query = "How do I track my recent order?"
print(get_rag_response(query))

##We did it!!