In [13]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import logging

In [14]:
# Set up logging to track any issues that might pop up
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [15]:
try:
    # Load our language model and tokenizer
    model_name = "facebook/opt-1.3b"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    # Loading sentence transformer for embeddings
    sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

    # Preparing knowledge base
    knowledge_base = [
    "Category 1 -Login Issues -Login issues often occur due to incorrect passwords or account lockouts.",
    "Category 2 -App Functionality -App crashes can be caused by outdated software or device incompatibility.",
    "Category 3 -Billing -Billing discrepancies may result from processing errors or duplicate transactions.",
    "Category 4 -Account Management -Account management includes tasks such as changing profile information, linking social media accounts, and managing privacy settings.",
    "Category 5 -Performance Issues -Performance issues can be related to device specifications, network connectivity, or app optimization."
]


    # Create FAISS index for efficient retrieval
    kb_embeddings = sentence_model.encode(knowledge_base)
    index = faiss.IndexFlatL2(kb_embeddings.shape[1])
    index.add(kb_embeddings)

except Exception as e:
    logging.error(f"Error during initialization: {str(e)}")
    raise

2024-08-06 11:34:53,883 - INFO - Use pytorch device_name: cpu
2024-08-06 11:34:53,972 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [16]:
# This function finds the most relevant info from our knowledge base
def retrieve_relevant_info(query, k=2):
    try:
        query_vector = sentence_model.encode([query])
        _, I = index.search(query_vector, k)
        return [knowledge_base[i] for i in I[0]]
    except Exception as e:
        logging.error(f"Error in retrieve_relevant_info: {str(e)}")
        return []

In [17]:
def classify_ticket(ticket_text):
    try:
        # Retrieve relevant information
        relevant_info = retrieve_relevant_info(ticket_text)
        if not relevant_info:
            return "Classification unclear due to retrieval error"
        
         # We're asking our model to classify the ticket based on the relevant info
        prompt = f"""Classify the following support ticket into one of the given categories. 
Provide only the category number and name in your response, like 'Category X - Category Name'.

Support Ticket: {ticket_text}

Relevant Information:
{relevant_info[0]}
{relevant_info[1]}

Only provide the category number and name in the format 'Category X - Category Name'.
Classification:"""
        
        # Generate classification using LLM
        inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=20, num_return_sequences=1, temperature=0.7)
        classification = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract the category from the generated text
        lines = classification.split('\n')
        for line in lines:
            if line.startswith("Category"):
                return line.strip()
        
        return "Classification unclear"
    except Exception as e:
        logging.error(f"Error in classify_ticket: {str(e)}")
        return "Classification failed due to an error"

In [18]:
support_tickets = [
    {"text": "My account login is not working. I've tried resetting my password twice."},
    {"text": "The app crashes every time I try to upload a photo."},
    {"text": "I was charged twice for my last subscription payment."},
    {"text": "I can't find the option to change my profile picture."},
    {"text": "The video playback is very laggy on my device."}
]

In [19]:
# Process all tickets
for i, ticket in enumerate(support_tickets, 1):
    try:
        result = classify_ticket(ticket["text"])
        print(f"Ticket {i}:")
        print(f"Text: {ticket['text']}")
        print(f"Classification: {result}")
        print()
    except Exception as e:
        logging.error(f"Error processing ticket {i}: {str(e)}")
        print(f"Ticket {i}: Error during processing")
        print()

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Ticket 1:
Text: My account login is not working. I've tried resetting my password twice.
Classification: Category 1 -Login Issues -Login issues often occur due to incorrect passwords or account lockouts.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Ticket 2:
Text: The app crashes every time I try to upload a photo.
Classification: Category 2 -App Functionality -App crashes can be caused by outdated software or device incompatibility.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Ticket 3:
Text: I was charged twice for my last subscription payment.
Classification: Category 3 -Billing -Billing discrepancies may result from processing errors or duplicate transactions.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Ticket 4:
Text: I can't find the option to change my profile picture.
Classification: Category 4 -Account Management -Account management includes tasks such as changing profile information, linking social media accounts, and managing privacy settings.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Ticket 5:
Text: The video playback is very laggy on my device.
Classification: Category 5 -Performance Issues -Performance issues can be related to device specifications, network connectivity, or app optimization.

