# Load Models and Embeddings

In [1]:
!pip install langchain_groq langchain_huggingface faiss-cpu faiss-gpu

Collecting langchain_groq
  Downloading langchain_groq-0.1.9-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain_huggingface
  Downloading langchain_huggingface-0.0.3-py3-none-any.whl.metadata (1.2 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting groq<1,>=0.4.1 (from langchain_groq)
  Downloading groq-0.9.0-py3-none-any.whl.metadata (13 kB)
Collecting langchain-core<0.3.0,>=0.2.26 (from langchain_groq)
  Downloading langchain_core-0.2.32-py3-none-any.whl.metadata (6.2 kB)
Collecting sentence-transformers>=2.6.0 (from langchain_huggingface)
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting httpx<1,>=0.23.0 (from groq<1,>=0.4.1->langchain_groq)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting jsonpat

In [3]:
# Load environment variables and models
from google.colab import userdata
groq_token = userdata.get('GROQ_API_KEY')

"""
Comment the above line and uncomment the below line to use your own GROQ API key.
"""
#groq_token = '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'

# Loading the LLM model (Llama 3.1)
from langchain_groq import ChatGroq
groq_model = ChatGroq(api_key=groq_token, model="llama-3.1-8b-instant")

# Loading the HuggingFace embeddings model
from langchain_huggingface import HuggingFaceEmbeddings
embeds = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# Define Knowledge Base and Save to File

In [4]:
import os

# Function to save the knowledge base to a file
def save_knowledge_base(data, file_path):
    # Extract directory from the file path
    directory = os.path.dirname(file_path)

    # Check if the directory exists, if not, create it
    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f"Directory '{directory}' created.")
    else:
        print(f"Directory '{directory}' already exists.")

    # Write the knowledge base data to the file
    with open(file_path, "w") as file:
        for entry in data:
            file.write(entry + "\n")
        print(f"Knowledge base saved to {file_path}")

# Define the knowledge base content
knowledge_base = [
    "Category 1 - Login Issues - Login issues often occur due to incorrect passwords or account lockouts.",
    "Category 2 - App Functionality - App crashes can be caused by outdated software or device incompatibility.",
    "Category 3 - Billing - Billing discrepancies may result from processing errors or duplicate transactions.",
    "Category 4 - Account Management - Account management includes tasks such as changing profile information, linking social media accounts, and managing privacy settings.",
    "Category 5 - Performance Issues - Performance issues can be related to device specifications, network connectivity, or app optimization."
]

# Path to store the knowledge base
kb_file_path = "Data/kb_file.txt"

# Save the knowledge base to the file
save_knowledge_base(knowledge_base, kb_file_path)


Directory 'Data' created.
Knowledge base saved to Data/kb_file.txt


# Load and Split the Knowledge Base into Chunks

In [5]:
!pip install langchain_community langchain_text_splitters

Collecting langchain_community
  Downloading langchain_community-0.2.12-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain_text_splitters
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting langchain<0.3.0,>=0.2.13 (from langchain_community)
  Downloading langchain-0.2.14-py3-none-any.whl.metadata (7.1 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.21.3-py3-none-any.whl.metadata (7.1 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 

In [27]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Function to load and split documents
def load_and_split_docs(file_path, chunk_size=200):
    loader = TextLoader(file_path)
    docs = loader.load()  # Loading documents from the file
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size)
    return splitter.split_documents(docs)

# Load and split the knowledge base into chunks
kb_chunks = load_and_split_docs(kb_file_path)

In [28]:
kb_chunks

[Document(metadata={'source': 'Data/kb_file.txt'}, page_content='Category 1 - Login Issues - Login issues often occur due to incorrect passwords or account lockouts.'),
 Document(metadata={'source': 'Data/kb_file.txt'}, page_content='Category 2 - App Functionality - App crashes can be caused by outdated software or device incompatibility.'),
 Document(metadata={'source': 'Data/kb_file.txt'}, page_content='Category 3 - Billing - Billing discrepancies may result from processing errors or duplicate transactions.'),
 Document(metadata={'source': 'Data/kb_file.txt'}, page_content='Category 4 - Account Management - Account management includes tasks such as changing profile information, linking social media accounts, and managing privacy settings.'),
 Document(metadata={'source': 'Data/kb_file.txt'}, page_content='Category 5 - Performance Issues - Performance issues can be related to device specifications, network connectivity, or app optimization.')]

# Create a Vector Store

In [29]:
from langchain_community.vectorstores import FAISS

# Function to create a vector store
def create_vector_store(documents, embeddings_model):
    return FAISS.from_documents(documents=documents, embedding=embeddings_model)

# Create the vector store using the knowledge base chunks and embeddings model
kb_vectorstore = create_vector_store(kb_chunks, embeds)

# Define the System Prompt for Question-Answering

In [30]:
# Importing the ChatPromptTemplate for prompt creation
from langchain_core.prompts import ChatPromptTemplate

# System prompt providing instructions and context
guidelines_prompt = (
    """
### Guidelines

Background Information:
{context}

Review the provided background information, which includes category labels and their descriptions. Then, follow these steps:

1. Analyze the given input text.
2. Categorize the input into one of these categories:
   - Category 1 - Login Issues
   - Category 2 - App Functionality
   - Category 3 - Billing
   - Category 4 - Account Management
   - Category 5 - Performance Issues

3. If the input is unrelated to the background information or you're unsure of the classification, respond with 'I don't know'.

4. Provide only the category label as your response, without any additional text.
    """
)

qa_template = ChatPromptTemplate.from_messages(
    [
        ("system", guidelines_prompt),
        ("human", "{input}"),
    ]
)


# Build the Retrieval-Augmented Generation Chain

In [35]:
# Importing required function to create QA chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

# Create the QA chain using the vector store and prompt template
qa_chain = create_stuff_documents_chain(groq_model, qa_template)

# Create the retriever for the vector store
retriever = kb_vectorstore.as_retriever(k=3)

# Create the retrieval chain combining vector store and QA chain
rag_chain = create_retrieval_chain(retriever, qa_chain)

# Classify Support Tickets

In [37]:
# Function to classify support tickets using the RAG model
def classify_support_tickets(tickets, rag_chain):
    classified_tickets = []

    # Process each ticket and store the classification result
    for ticket in tickets:
        # Extract the ticket text
        ticket_text = ticket['text']

        # Invoke the RAG chain to classify the ticket
        response = rag_chain.invoke({"input": ticket_text})

        # Append the classification result
        classified_tickets.append({"ticket": ticket_text,"classification": response['answer']})

    return classified_tickets

# Function to display classified tickets in a formatted way
def display_classified_tickets(classified_tickets):
    print("\n--- Classified Support Tickets ---\n")
    for i, ticket_info in enumerate(classified_tickets, 1):
        print(f"Ticket {i}:")
        print(f"   Text: {ticket_info['ticket']}")
        print(f"   Classification: {ticket_info['classification']}")
        print()

# Example tickets
support_tickets = [
    {"text": "My account login is not working. I've tried resetting my password twice."},
    {"text": "The app crashes every time I try to upload a photo."},
    {"text": "I was charged twice for my last subscription payment."},
    {"text": "I can't find the option to change my profile picture."},
    {"text": "The video playback is very laggy on my device."}
]

# Classify the support tickets using the RAG chain
classified_tickets = classify_support_tickets(support_tickets, rag_chain)

# Display the classified tickets
display_classified_tickets(classified_tickets)


--- Classified Support Tickets ---

Ticket 1:
   Text: My account login is not working. I've tried resetting my password twice.
   Classification: Category 1 - Login Issues

Ticket 2:
   Text: The app crashes every time I try to upload a photo.
   Classification: Category 2 - App Functionality

Ticket 3:
   Text: I was charged twice for my last subscription payment.
   Classification: Category 3 - Billing

Ticket 4:
   Text: I can't find the option to change my profile picture.
   Classification: Category 4 - Account Management

Ticket 5:
   Text: The video playback is very laggy on my device.
   Classification: Category 5 - Performance Issues

