In [2]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import JSONLoader
from langchain.vectorstores import FAISS
from sentence_transformers import SentenceTransformer
from langchain_huggingface import HuggingFaceEmbeddings
import csv
import json


In [3]:
"""
- Convert the CSV file into a JSON Object
- Convert each row into a Document
"""
from langchain.schema import Document


file_path = "./datasets/query_support_ticket.csv"

with open(file_path, mode='r', newline='', encoding ='utf-8') as csvfile:
    data = list(csv.DictReader(csvfile))

documents = [Document(page_content=str(row), metadata=row) for row in data]
documents

[Document(metadata={'Query': "I'm unable to generate reports from the dashboard. The button remains greyed out even after selecting all the required filters. This happens on Chrome on macOS Sonoma.", 'Title': 'Dashboard report generation button is greyed out after applying filters', 'Browser': 'Chrome', 'OS': 'macOS Sonoma', 'Customer_Type': 'Individual', 'Issue': 'Dashboard report generation button remains greyed out after applying filters', 'Resolution': 'Clear browser cache and cookies'}, page_content='{\'Query\': "I\'m unable to generate reports from the dashboard. The button remains greyed out even after selecting all the required filters. This happens on Chrome on macOS Sonoma.", \'Title\': \'Dashboard report generation button is greyed out after applying filters\', \'Browser\': \'Chrome\', \'OS\': \'macOS Sonoma\', \'Customer_Type\': \'Individual\', \'Issue\': \'Dashboard report generation button remains greyed out after applying filters\', \'Resolution\': \'Clear browser cache 

In [4]:
"""
- Use a light weight and free Open Source Embedding Model (using SentenceTransformers)
- create a FAISS Vector Store from the Documents
- Persist the FAISS index locally for later use
"""

model_name = "all-MiniLM-L6-v2"

embeddings = HuggingFaceEmbeddings(model_name = model_name)

faiss_db = FAISS.from_documents(documents, embeddings)
faiss_db.save_local("./vectorDB/faiss_index")


In [5]:
# Example query text
query = "I have a login error in chrome"

# Perform a similarity search
results = faiss_db.similarity_search(query)

# Print out the results
for result in results:
    print(result)  # or result.metadata, depending on what you stored


page_content='{'Query': 'I’m using Brave browser and the login page doesn’t load—it’s just blank. Any compatibility issues?', 'Title': 'Login page not loading in Brave browser', 'Browser': 'Brave', 'OS': '', 'Customer_Type': 'Individual', 'Issue': 'Login page fails to load with blank screen', 'Resolution': 'Check for browser updates and disable any conflicting extensions'}' metadata={'Query': 'I’m using Brave browser and the login page doesn’t load—it’s just blank. Any compatibility issues?', 'Title': 'Login page not loading in Brave browser', 'Browser': 'Brave', 'OS': '', 'Customer_Type': 'Individual', 'Issue': 'Login page fails to load with blank screen', 'Resolution': 'Check for browser updates and disable any conflicting extensions'}
page_content='{'Query': 'every time I try to reset my password', 'Title': 'Password reset token expired error', 'Browser': '', 'OS': '', 'Customer_Type': 'Individual', 'Issue': 'Password reset token expired error', 'Resolution': 'Check if the password 

## If computer is restarted then we can load local database created above


In [6]:
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

model_name = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

# Load the persisted FAISS index with dangerous deserialization allowed
faiss_db = FAISS.load_local("./vectorDB/faiss_index", embeddings, allow_dangerous_deserialization=True)

query = "Mobile login error"
results = faiss_db.similarity_search(query)

for result in results:
    print(result.page_content)


{'Query': 'the app is stuck on a loading spinner after login. Happens on Android 13. Works fine on iOS. Any updates? – Beta Tester', 'Title': 'App loading spinner stuck after login on Android 13', 'Browser': 'Android 13', 'OS': 'Android', 'Customer_Type': 'Beta Tester', 'Issue': 'App is stuck on loading spinner after login on Android 13', 'Resolution': 'Check for app updates and ensure the device is running the latest Android OS version'}
{'Query': "The mobile app crashes on launch for Android 14 users. It briefly shows the splash screen then closes. We've tried reinstalling. Help? – Dev Team", 'Title': 'Mobile app crashes on launch for Android 14 users', 'Browser': '', 'OS': 'Android 14', 'Customer_Type': 'Developer', 'Issue': 'Mobile app crashes on launch for Android 14 users', 'Resolution': 'Check for app updates and ensure the device is compatible with Android 14. If the issue persists'}
{'Query': 'I’m using Brave browser and the login page doesn’t load—it’s just blank. Any compati

In [7]:

def query_faiss(user_query, index_path="./vectorDB/faiss_index", top_k=5):
    """
    Given a user query, this function loads the persisted FAISS index,
    performs a similarity search, and returns the top_k matching documents.
    
    Args:
        user_query (str): The input query from the user.
        index_path (str): The path where the FAISS index is saved.
        top_k (int): Number of similar documents to return.
        
    Returns:
        list: A list of the top_k matching document objects.
    """
    
    model_name = "all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    
    faiss_db = FAISS.load_local(index_path, embeddings, allow_dangerous_deserialization=True)
    results = faiss_db.similarity_search(user_query, k=top_k)
    
    return results

In [8]:
# Example usage:
if __name__ == "__main__":
    query = input("Enter your query: ")
    similar_docs = query_faiss(query)
    
    print("\nTop matching documents:")
    for doc in similar_docs:
        print("-----")
        print(doc.page_content)



Top matching documents:
-----
{'Query': "we tried importing our user data via CSV but keep getting 'Invalid file format' even though it follows the template.", 'Title': 'CSV import failure with invalid format', 'Browser': 'Edge', 'OS': 'Windows 11', 'Customer_Type': 'Individual', 'Issue': 'CSV file format error despite following the template', 'Resolution': 'Verify file encoding (UTF-8 or UTF-16) and check for missing commas or incorrect delimiters in the CSV'}
-----
{'Query': 'when exporting a CSV report', 'Title': 'CSV export formatting issue with commas in text fields', 'Browser': '', 'OS': '', 'Customer_Type': 'Developer', 'Issue': 'Commas in text fields cause CSV export formatting issues', 'Resolution': 'Wrap fields in quotes when exporting CSV to prevent comma issues'}
-----
{'Query': "I just tried uploading a logo in our profile settings and received 'Unsupported file format'—but it's a PNG.", 'Title': "Logo upload fails with 'Unsupported file format' error", 'Browser': '', 'OS

In [9]:
import os
from dotenv import load_dotenv
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamlitCallbackHandler  # example callback for streaming output
from langchain.schema import HumanMessage, SystemMessage

# Load environment variables (make sure OPENAI_API_KEY is set)
load_dotenv()


def construct_rag_prompt(query, retrieved_docs):
    """
    Build a prompt that includes retrieved document content and the user's query.
    """
    context = "\n\n".join([doc.page_content for doc in retrieved_docs])
    print(context)
    prompt = f"""
        You are a customer support AI assistant. Your task is to find the most appropriate resolution for a customer's issue based on their query.

        You are given:
        - A dataset of support tickets consisting of past customer's query that may contain similar issues and their corresponding resolutions

        Example input:
        Customer Query: "Hello team, we can’t delete old projects—clicking delete does nothing. No error message either. Using Edge on Windows."

        Retrieved Support Tickets:
        1. Issue: Project deletion not working
            Browser: Edge
            OS: Windows
            Resolution: Check Edge settings or project status
        
        Expected Response:
        Check Edge settings or project status

        Retrieved Documents : 
        {context}

        Instructions:
        1. Carefully analyze the customer's query.
        2. Review the retrieved support tickets to find one that closely matches the issue described.
        3. If a relevant resolution is found in the support tickets, return the most suitable resolution.
        4. If no matching issue is found, respond with: "No relevant resolution found in the current knowledge base."

        Respond with only the resolution text if found.

        Customer Query : 
        {query}

    """
    return prompt

In [10]:
from transformers import AutoModelForCausalLM, AutoTokenizer



def get_response_qwen3_1_7b(prompt):
    
    model_name = "Qwen/Qwen3-1.7B"
    # load the tokenizer and the model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype="auto",
        device_map="auto"
    )
    
    messages = [
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    # conduct text completion
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=32768
    )
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() 

    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0

    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

    print("thinking content:", thinking_content)
    print("content:", content)

    return content

In [11]:
query = "Tried importing our user data via CSV but keep getting 'Invalid file format' even though it follows the template."
retrieved_docs = query_faiss(query)
    
prompt = construct_rag_prompt(query, retrieved_docs)
get_response_qwen3_1_7b(prompt)

{'Query': "we tried importing our user data via CSV but keep getting 'Invalid file format' even though it follows the template.", 'Title': 'CSV import failure with invalid format', 'Browser': 'Edge', 'OS': 'Windows 11', 'Customer_Type': 'Individual', 'Issue': 'CSV file format error despite following the template', 'Resolution': 'Verify file encoding (UTF-8 or UTF-16) and check for missing commas or incorrect delimiters in the CSV'}

{'Query': "I just tried uploading a logo in our profile settings and received 'Unsupported file format'—but it's a PNG.", 'Title': "Logo upload fails with 'Unsupported file format' error", 'Browser': '', 'OS': '', 'Customer_Type': 'Individual', 'Issue': "Logo upload fails with 'Unsupported file format' error", 'Resolution': 'Check if the server settings allow PNG file uploads and ensure the file is correctly formatted.'}

{'Query': 'when exporting a CSV report', 'Title': 'CSV export formatting issue with commas in text fields', 'Browser': '', 'OS': '', 'Cus

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

thinking content: <think>
Okay, let's see. The customer's query is about importing user data via CSV but getting an 'Invalid file format' even though the file follows the template. I need to find a matching support ticket.

Looking at the retrieved documents, the first one is about a CSV import failure with invalid format. The customer's query matches that exactly: they tried importing via CSV, got invalid format despite following the template. The resolution there is to check the file encoding (UTF-8 or UTF-16) and check for missing commas or incorrect delimiters. 

The other tickets are about different issues like logo uploads, CSV export formatting, and attachments. The customer's issue is specifically about CSV import format, so the first document is the closest match. The resolution provided there is relevant here. 

No other tickets mention CSV import errors. So the answer should be the resolution from the first document.
</think>
content: Check UTF-8 or UTF-16 encoding and check

'Check UTF-8 or UTF-16 encoding and check for missing commas or incorrect delimiters in the CSV'