In [2]:
import tkinter as tk
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from transformers import T5Tokenizer, T5ForConditionalGeneration
from PIL import Image, ImageTk  # Import for handling images
import os

print("Loading models... This may take a minute.")

# Load the fine-tuned models
embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')  # Fine-tuned SentenceTransformer model

# Check if fine-tuned model exists, otherwise use pretrained
model_path = './trained_t5_model'
if os.path.exists(model_path):
    print("Loading fine-tuned T5 model...")
    tokenizer_t5 = T5Tokenizer.from_pretrained(model_path, local_files_only=True)
    qa_model = T5ForConditionalGeneration.from_pretrained(model_path, local_files_only=True)
else:
    print("Fine-tuned model not found. Using pretrained T5 model...")
    print("To use a fine-tuned model, please run T5_QnA_FineTuning.ipynb first.")
    tokenizer_t5 = T5Tokenizer.from_pretrained('t5-small')
    qa_model = T5ForConditionalGeneration.from_pretrained('t5-small')

print("Loading dataset...")

# Load the dataset from CSV (Make sure the dataset has been properly generated)
df = pd.read_csv('confluence_pages.csv')

# Preprocess the dataset: Combine title and content into a single string for embedding
pages_data = [f"{title}: {content}" for title, content in zip(df['title'], df['content'])]

print("Computing embeddings...")
page_embeddings = embedding_model.encode(pages_data)

print("Ready! Starting application...")

# Initialize the QA pipeline with T5 model
def qa_pipeline(query, context):
    input_text = f"question: {query} context: {context}"
    inputs = tokenizer_t5(input_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    outputs = qa_model.generate(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=150)
    answer = tokenizer_t5.decode(outputs[0], skip_special_tokens=True)
    return answer

# Function to process user queries and return the most relevant Confluence page response
def bot_response(query):
    query_embedding = embedding_model.encode([query])
    cosine_similarities = cosine_similarity(query_embedding, page_embeddings)
    
    # Get the index of the most relevant page based on similarity
    most_relevant_page_index = np.argmax(cosine_similarities)
    
    # Get the relevant page data (title, content, etc.)
    response = df.iloc[most_relevant_page_index].to_dict()
    similarity_score = cosine_similarities[0][most_relevant_page_index]
    
    # Use the fine-tuned T5 model to get a more accurate answer based on the content
    context = response['content']
    answer = qa_pipeline(query=query, context=context)
    
    return answer, response, similarity_score

# Create the main application window
window = tk.Tk()
window.title("WileyGPT - Confluence Chatbot")

# Set window size and background color
window.geometry("1000x800")
window.config(bg="#F7F9FC")  # Light background color for a modern look

# Frame for the header with the welcome message
header_frame = tk.Frame(window, bg="#4CAF50", padx=20, pady=15)
header_frame.pack(fill="x", pady=20)

# Welcome message
welcome_label = tk.Label(header_frame, text="Welcome to WileyGPT!", font=("Arial", 22, "bold"), fg="white", bg="#4CAF50")
welcome_label.pack(pady=5)

# Load the image (assuming the image is saved as 'chatbot_image.png')
image_path = r'C:\Users\locperera\OneDrive - Wiley\Documents\AI Hackathon\bot.jpg'  # Provide the correct path to the generated image

# Open the image using PIL
image = Image.open(image_path)

# Resize the image if necessary to fit the UI (optional) - Fixed deprecation warning
image = image.resize((150, 150), Image.LANCZOS)

# Convert the image to a format Tkinter can handle
photo = ImageTk.PhotoImage(image)

# Create a label to display the image
image_label = tk.Label(window, image=photo, bg="#F7F9FC")
image_label.pack(pady=10)

# Instructions label (ask user to ask a question)
prompt_label = tk.Label(window, text="Hi there! How can I assist you today?",
                        font=("Arial", 14), wraplength=650, fg="#333333", bg="#F7F9FC")
prompt_label.pack(pady=10)

# Frame for the user input and submit button
input_frame = tk.Frame(window, bg="#F7F9FC")
input_frame.pack(pady=15)

# Entry field for user input (placed on the left side of the frame)
user_input = tk.Entry(input_frame, font=("Arial", 16), width=100, borderwidth=4, relief="solid", justify="center")
user_input.pack(side="left", padx=10)

# Submit button (aligned to the right side of the user input)
submit_button = tk.Button(input_frame, text="Submit", font=("Arial", 16), bg="#4CAF50", fg="white", 
                          width=15, height=2, command=lambda: on_submit(), relief="flat", bd=0)
submit_button.pack(side="right")

# Frame for the conversation area (initially hidden)
conversation_frame = tk.Frame(window, bg="#F7F9FC", width=900, height=500)
conversation_frame.pack(fill="both", expand=True)

# Create a canvas for scrolling the conversation area
canvas = tk.Canvas(conversation_frame, bg="#F7F9FC")
scrollbar = tk.Scrollbar(conversation_frame, orient="vertical", command=canvas.yview)
scrollable_frame = tk.Frame(canvas)

# Configure the canvas to link to the scrollable frame
canvas.configure(yscrollcommand=scrollbar.set)
scrollbar.pack(side="right", fill="y")
canvas.pack(side="left", fill="both", expand=True)

# Create a window on the canvas to hold the content dynamically
canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")

# To store the displayed answers and avoid duplicates
seen_responses = set()

# Function to handle submit button click
def on_submit():
    query = user_input.get()  # Get user query
    
    if not query.strip():
        return
    
    # Show "thinking" message
    submit_button.config(text="Thinking...", state="disabled")
    window.update()
    
    # Get bot response (single most relevant answer)
    answer, response, similarity_score = bot_response(query)
    
    # Re-enable button
    submit_button.config(text="Submit", state="normal")
    
    # Keep the user's question in the input box after submission
    user_input.delete(0, tk.END)  # Clear the input box
    user_input.insert(0, "")  # Reinsert the query

    # Prepend the new question to the conversation (left side)
    question_label_left = tk.Label(scrollable_frame, text=f"User: {query}", font=("Arial", 12, "bold"), fg="#333333", bg="#F7F9FC", anchor="w", justify="left")
    question_label_left.pack(fill="both", padx=10, pady=5)

    # Create a unique key for the response (based on title and content)
    response_key = (response['title'], response['content'], similarity_score)
    
    # Check if this response has already been shown
    if response_key not in seen_responses:
        seen_responses.add(response_key)

        # Add the response to the conversation (right side)
        response_text_frame = tk.Frame(scrollable_frame)
        response_text_frame.pack(fill="both", expand=True, padx=10, pady=5)

        # Add the response text area without a scrollbar, and expand it
        response_text = tk.Text(response_text_frame, font=("Arial", 12), width=150, height=35, wrap="word", padx=15, pady=15,
                                bg="#FFFFFF", fg="#333333", borderwidth=2, relief="solid", state=tk.DISABLED)

        # Enable text area to update the response
        response_text.config(state=tk.NORMAL)
        response_text.delete(1.0, tk.END)  # Clear previous response

        # Insert the response in the text area with formatting
        response_text.insert(tk.END, "\n\n=====================\n")
        response_text.insert(tk.END, f"AskWiley: Great news! I've found the best match for your query. (Similarity score: {similarity_score:.4f}) Let's dive into the details:\n\n")

        # Apply bold formatting for the title using Tkinter tags
        response_text.tag_configure("bold", font=("Arial", 12, "bold"))
        response_text.insert(tk.END, f"Title: ", "bold")
        response_text.insert(tk.END, f"{response['title']}\n\n")
        
        # Add content and other sections with clear formatting
        response_text.insert(tk.END, f"Content:\n", "bold")  # Use fine-tuned model's answer
        response_text.insert(tk.END, f"{answer}\n\n")
        
        # Section Formatting (bold sections for Resources, Video Links, etc.)
        response_text.insert(tk.END, "Resources:\n", "bold")
        response_text.insert(tk.END, f"{response['resources']}\n\n")
        response_text.insert(tk.END, "Video Links:\n", "bold")
        response_text.insert(tk.END, f"{response['video_links']}\n\n")
#         response_text.insert(tk.END, "Web Links:\n", "bold")
#         response_text.insert(tk.END, f"{response['web_links']}\n\n")
        response_text.insert(tk.END, "Author:\n", "bold")
        response_text.insert(tk.END, f"{response['author']}\n\n")
        response_text.insert(tk.END, "People Tagged:\n", "bold")
        response_text.insert(tk.END, f"{response['people_tagged']}\n\n")
        response_text.insert(tk.END, "Last Modified:\n", "bold")
        response_text.insert(tk.END, f"{response['modified_date']}\n")
        
        response_text.insert(tk.END, "\n=====================\n")
        response_text.config(state=tk.DISABLED)  # Disable text area again to prevent manual editing
        
        # Place the text box in the response frame
        response_text.pack(fill="both", expand=True)

    # Update the canvas scrolling region to encompass the entire scrollable frame
    scrollable_frame.update_idletasks()
    canvas.config(scrollregion=canvas.bbox("all"))

# Run the application
window.mainloop()

Loading models... This may take a minute.


Loading weights: 100%|██████████| 103/103 [00:00<00:00, 257.07it/s, Materializing param=pooler.dense.weight]                             
BertModel LOAD REPORT from: sentence-transformers/paraphrase-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Fine-tuned model not found. Using pretrained T5 model...
To use a fine-tuned model, please run T5_QnA_FineTuning.ipynb first.


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Loading weights: 100%|██████████| 131/131 [00:00<00:00, 356.85it/s, Materializing param=shared.weight]                                                      


Loading dataset...
Computing embeddings...
Ready! Starting application...
