In [None]:
import re
import requests
import json
import base64

from fastapi import HTTPException, status
from pyTigerGraph import TigerGraphConnection
# from app.common.config import db_config

# Define roles with full access
ALLOWED_ROLES = {'superuser', 'globaldesigner', 'admin'}

user_role_pattern = r'- Name:\s+(.+?)\s+- Global Roles:\s+(.+?)\s+-'

def get_user_role(username: str, password: str, conn=None) -> tuple[list[str], TigerGraphConnection]:
    if conn is None:
        conn = TigerGraphConnection(
            # host=db_config["hostname"], graphname="", username=username, password=password
            host="tigergraph_host",
            graphname="", username=username, password=password
        )

    try:
        # parse user info
        info = conn.gsql("SHOW USER")
        user_roles = {}
        # print (info)
        for match in re.finditer(user_role_pattern, info):
            name = match.group(1).strip()
            global_roles = match.group(2).strip()
            user_roles[name] = global_roles

    except requests.exceptions.HTTPError as e:
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="Incorrect username or password",
        )
    except Exception as e:
        raise e
    return user_roles, conn

In [None]:
user_roles, conn = get_user_role(username="USERNAME", password="PASSWORD")

for user, roles in user_roles.items():
    print (f"{user}: {roles}")

In [None]:
def create_headers(username, password):
    """Create headers with Base64 encoded credentials."""
    credentials = f"{username}:{password}"
    encoded_credentials = base64.b64encode(credentials.encode("utf-8")).decode("utf-8")
    return {
        'accept': 'application/json',
        'Authorization': f'Basic {encoded_credentials}'
    }

In [None]:
def get_user_conversation_ids(username, password):
    """Fetch conversation IDs for a given user."""
    headers = create_headers(username, password)
    user_url = f'http://0.0.0.0:8000/ui/user/{username}'
    
    response = requests.get(user_url, headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        return [item['conversation_id'] for item in data]
    else:
        print(f"Request failed with status code {response.status_code}")
        print(response.text)
        return None

In [None]:
get_user_conversation_ids("supportai", "supportai")

In [None]:
def get_conversation_data(username, password, conversation_id):
    """Fetch conversation data for a given conversation ID."""
    headers = create_headers(username, password)
    conversation_url = f'http://0.0.0.0:8000/ui/conversation/{conversation_id}'
    
    response = requests.get(conversation_url, headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        # return [
        #     {
        #         "id": message["id"],
        #         "message_id": message["message_id"],
        #         "parent_id": message["parent_id"],
        #         "role": message["role"],
        #         "content": message.get("content", ""),
        #         "feedback": message.get("feedback", "")
        #     }
        #     for message in data
        # ]
        
        # Create dictionaries to hold user questions and system answers
        questions = {message["message_id"]: message for message in data if message["role"] == "user"}
        answers = {message["parent_id"]: message for message in data if message["role"] == "system"}
        
        # return questions, answers
        # Organize into Q&A pairs
        qa_pairs = []
        for q_id, question in questions.items():
            if q_id in answers:
                qa_pairs.append({
                    "question": question["content"],
                    "answer": answers[q_id]["content"],
                    "feedback": answers[q_id]["feedback"]
                })
        
        return qa_pairs
    else:
        print(f"Request failed with status code {response.status_code}")
        print(response.text)
        return None

In [None]:
def check_user_role(username, password, user_roles, conversation_id=None):
    """Check user role and permissions for accessing conversations."""
    if username not in user_roles:
        return "User does not exist in the database."
    
    if user_roles.get(username) in {'superuser', 'globaldesigner', 'admin'}:
        return True  # Allow access for superusers, global designers, and admins
    elif conversation_id:
        # For non-superuser roles, check if the conversation belongs to the user
        conversation_ids = get_user_conversation_ids(username, password)
        if conversation_id in conversation_ids:
            return True
        else:
            return False
    else:
        return False

In [None]:
def fetch_user_conversations(username, password, user_roles, conversation_id=None):
    """Fetch conversations based on user roles and permissions."""
    permission_check = check_user_role(username, password, user_roles)
    if permission_check is not True:
        return permission_check  # Return the error message if permissions are insufficient or user does not exist

    if conversation_id:
            # Fetch a specific conversation
            data = get_conversation_data(username, password, conversation_id)
            if data:
                return data
            else:
                return "Conversation not found or could not be retrieved."
            
    else:
        # Fetch all conversations
        conversation_ids = get_user_conversation_ids(username, password)
        conversations = {}
        
        for conv_id in conversation_ids:
            data = get_conversation_data(username, password, conv_id)
            if data:
                conversations[conv_id] = data
    
        return conversations

In [None]:
conversation_data = fetch_user_conversations("USERNAME", "PASSWORD", user_roles, "CONVO_ID")

In [None]:
def get_feedback_stats(conversation_data):
    # Initialize counters
    feedback_counts = {'No Feedback': 0, 'Thumbs Up': 0, 'Thumbs Down': 0}
    total_entries = len(conversation_data)
    
    # Count feedback occurrences
    for entry in conversation_data:
        feedback = entry['feedback']
        if feedback == 0:
            feedback_counts['No Feedback'] += 1
        elif feedback == 1:
            feedback_counts['Thumbs Up'] += 1
        elif feedback == 2:
            feedback_counts['Thumbs Down'] += 1
    
    # Calculate percentages
    feedback_percentages = {k: (v / total_entries) * 100 for k, v in feedback_counts.items()}
    
    return feedback_counts, feedback_percentages

In [None]:
# Get feedback stats
feedback_counts, feedback_percentages = get_feedback_stats(conversation_data)

print("Feedback Counts:", feedback_counts)
print("Feedback Percentages:", feedback_percentages)

In [None]:
import openai

# Set your OpenAI API key
openai.api_key = 'APIKEY'

# def summarize_text(text):
#     response = openai.chat.completions.create(
#         model="gpt-4", 
#         messages=[
#             {"role": "system", "content": "You are a helpful assistant."},
#             {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
#         ],
#         max_tokens=100,  # Adjust the number of tokens as needed
#         temperature=0.1
#     )
#     return response.choices[0].message.content.strip()

def classify_text(text, labels):
    response = openai.chat.completions.create(
        model="gpt-4",
        messages = [
        {"role": "system", "content": "You are an assistant that classifies text into categories."},
        {"role": "user", "content": f"Classify the following text into one of these categories: {', '.join(labels)}\n\nText:\n{text} and just return the labels only"}
        ],
        max_tokens=50,  # Adjust the number of tokens as needed
        temperature=0.1
    )
    return response.choices[0].message.content.strip()

In [None]:
feedback_messages = [item['question'] for item in conversation_data]

# Define labels for classification
labels = ["Missing Information", "Incorrect Information", "Irrelevant Information"]

# Process each piece of conversation data
# for i, text in enumerate(feedback_messages):
#     # summary = summarize_text(text)
#     classification = classify_text(text, labels)
#     # print(f"Conversation {i+1} Summary: {summary}")
#     print(f"Message: {text}")
#     print(f"Classification: {classification}")
#     print("---")

# Print the results in the desired format
# Initialize a dictionary to store classified issues
classified_issues = {category: [] for category in labels}

results = {}

for message in feedback_messages:
    result = classify_text(message, labels)
    results[message] = result
# Categorize each piece of text
for text, category in results.items()   :
    if category in classified_issues:
        classified_issues[category].append(text)
    else:
        classified_issues['Other'].append(text)

# Print the results in the desired format
print("\nCategorized Issues:\n")
for category in labels:
    if classified_issues[category]:
        print(f"{category}:")
        for issue in classified_issues[category]:
            print(f" - {issue}")
        print()  # Add a newline for better readability

In [None]:
from transformers import pipeline
from tqdm import tqdm

# Load summarization and zero-shot classification pipelines
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

def analyze_negative_feedback_with_llm(conversation_data):
    negative_feedback_entries = [entry for entry in conversation_data if entry['feedback'] == 2]
    
    # Define categories for classification
    categories = ["Missing Information", "Incorrect Information", "Irrelevant Information", "Other"]
    
    summarized_issues = []
    categorized_issues = {category: [] for category in categories}
    
    for entry in negative_feedback_entries:
        content = entry['content']

        # Dynamically set max_length based on input length
        input_length = len(content.split())
        max_length = max(10, int(input_length * 0.8))
        
        # Summarize the content
        summary = summarizer(content, max_length=max_length, min_length=5, do_sample=False)[0]['summary_text']
        summarized_issues.append(summary)
        
        # Classify the content into categories
        classification = classifier(content, candidate_labels=categories)
        top_category = classification['labels'][0]
        categorized_issues[top_category].append(content)
    
    return summarized_issues, categorized_issues


In [None]:
# Perform the analysis
summarized_issues, categorized_issues = analyze_negative_feedback_with_llm(conversation_data)

# Print the summarized issues
# print("Summarized Issues:")
# for summary in summarized_issues:
#     print(summary)

# Print categorized issues and identify incorrect information
print("\nCategorized Issues:")
for category, contents in categorized_issues.items():
    print(f"\n{category}:")
    for content in contents:
        print(f" - {content}")

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# Extract negative feedback messages
feedback_messages = [item['question'] for item in conversation_data]

# Custom tokenizer to handle specific patterns
def custom_tokenizer(text):
    # Split by non-alphanumeric characters
    tokens = text.split()
    # Remove numbers and tokens less than 3 characters long
    tokens = [token for token in tokens if not token.isdigit() and len(token) > 2]
    return tokens

# Vectorize the feedback messages with custom tokenizer
vectorizer = CountVectorizer(stop_words='english', tokenizer=custom_tokenizer)
X = vectorizer.fit_transform(feedback_messages)

# Fit LDA model
lda = LatentDirichletAllocation(n_components=2, random_state=42)
lda.fit(X)

# Display topics
def display_topics(model, feature_names, num_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print(f"Topic {topic_idx}:")
        print(" ".join([feature_names[i] for i in topic.argsort()[:-num_top_words - 1:-1]]))

num_top_words = 5
feature_names = vectorizer.get_feature_names_out()
display_topics(lda, feature_names, num_top_words)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

# Extract feedback messages
feedback_messages = [item['question'] for item in conversation_data]

# Vectorize the feedback messages
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(feedback_messages)

# Apply K-means clustering
num_clusters = 4  # Number of clusters
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X)

# Print clustering results
print("Clustering results:")
for i in range(num_clusters):
    cluster_label = f"Cluster {i}:"
    cluster_messages = [feedback_messages[j] for j in range(len(kmeans.labels_)) if kmeans.labels_[j] == i]
    print(cluster_label)
    for message in cluster_messages:
        print(f"- {message}")

# Print cluster centroids (important words for each cluster)
print("\nCluster centroids (top words per cluster):")
order_centroids = kmeans.cluster_centers_.argsort()[:, ::-1]
terms = vectorizer.get_feature_names_out()
for i in range(num_clusters):
    print(f"Cluster {i} words:", end='')
    for ind in order_centroids[i, :10]:  # Print top 10 words per cluster
        print(f' {terms[ind]}', end='')
    print()

