# **NEW DATASET**

# RAG system

In [None]:
import pandas as pd
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
import torch
import faiss
import numpy as np
import re

# Load the new dataset
df_books = pd.read_csv('FullDataSetGenreAndDescription.csv')

# Keep only relevant columns
df_books = df_books[['title', 'authors', 'description', 'average_rating', 'num_pages', 'ratings_count', 'isbn13', 'genre']]

# Drop rows with missing descriptions
df_books.dropna(subset=['description'], inplace=True)

# Reset index after cleaning
df_books.reset_index(drop=True, inplace=True)

# Load pre-trained DPR Context Encoder and Tokenizer
model_name = 'facebook/dpr-ctx_encoder-single-nq-base'
context_encoder = DPRContextEncoder.from_pretrained(model_name)
tokenizer = DPRContextEncoderTokenizer.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to device
context_encoder.to(device)

from tqdm import tqdm

def encode_descriptions(descriptions, batch_size=16):
    embeddings = []
    for i in tqdm(range(0, len(descriptions), batch_size)):
        batch = descriptions[i:i + batch_size].tolist()
        inputs = tokenizer(
            batch,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=512
        ).to(device)
        with torch.no_grad():
            outputs = context_encoder(**inputs).pooler_output
        embeddings.append(outputs.cpu().numpy())
    return np.vstack(embeddings)

# Encode descriptions
embeddings = encode_descriptions(df_books["description"])

# Initialize a FAISS index (L2 similarity)
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)

# Add embeddings to the index
faiss_index.add(embeddings)

# Save the FAISS index for later use
faiss.write_index(faiss_index, "book_retrieval_index.faiss")

# Function to parse the user query for author, description preferences, and genre
def parse_user_query(query):
    author = None
    genre = None
    
    # Extract author if mentioned
    if 'by' in query:
        author_match = re.search(r'by ([\w\s]+)', query)
        if author_match:
            author = author_match.group(1).strip()

    # Extract genre if mentioned
    if 'genre' in query:
        genre_match = re.search(r'genre ([\w\s]+)', query)
        if genre_match:
            genre = genre_match.group(1).strip()

    # Remaining query as description keywords
    description_keywords = query.split()

    return author, genre, description_keywords

# Function to retrieve books based on user query
def retrieve_books(query, top_n=5):
    author, genre, description_keywords = parse_user_query(query)
    
    # Formulate a query based on parsed description keywords
    description_query = " ".join(description_keywords)
    
    # Encode the description query
    query_embedding = context_encoder(**tokenizer([description_query], return_tensors='pt', padding=True, truncation=True).to(device)).pooler_output
    query_embedding = query_embedding.cpu().detach().numpy()
    
    # Search the FAISS index for the top_n most similar books
    distances, indices = faiss_index.search(query_embedding, top_n)
    
    # Retrieve the details of the top books
    results = df_books.iloc[indices[0]]

    # Filter results by author if mentioned
    if author:
        results = results[results['authors'].str.contains(author, case=False, na=False)]

    # Filter results by genre if mentioned
    if genre:
        results = results[results['genre'].str.contains(genre, case=False, na=False)]

    return results

# Test the retriever with a sample query
sample_query = "Give me a thiller book"
top_books = retrieve_books(sample_query, top_n=5)
print(top_books[['title', 'authors', 'average_rating', 'genre', 'description']])


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this fu

                                                  title  \
78                                La danza de la muerte   
1298  At the Mountains of Madness and Other Novels o...   
2095                                   The Untouchables   
2396   Aeneid: Selections from Books 1  2  4  6  10  12   
3844                  Diablo II Ultimate Strategy Guide   

                                              authors  average_rating  \
78                    Stephen King/Eduardo Goligorsky            4.34   
1298  H.P. Lovecraft/August Derleth/E. Hoffmann Price            4.44   
2095                          Eliot Ness/Oscar Fraley            3.89   
2396                       Virgil/Barbara Weiden Boyd            4.35   
3844                                   Bart G. Farkas            3.81   

                                                  genre  \
78    Based on the title "La danza de la muerte," a ...   
1298  The genre for the book "At the Mountains of Ma...   
2095  Based on the title "The

# Generator

In [None]:
!pip install openai

  pid, fd = os.forkpty()


Collecting openai
  Downloading openai-1.57.4-py3-none-any.whl.metadata (24 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Downloading openai-1.57.4-py3-none-any.whl (390 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m390.3/390.3 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading jiter-0.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (345 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.0/345.0 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pandas as pd
import re
import torch
from openai import OpenAI
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
#from kaggle_secrets import UserSecretsClient
import faiss
import numpy as np

# Initialize the OpenAI client
client = OpenAI(api_key="XXX")

# Load the dataset
df_books = pd.read_csv("FullDataSetGenreAndDescription.csv")

# Drop rows with missing descriptions
df_books.dropna(subset=['description'], inplace=True)
df_books.reset_index(drop=True, inplace=True)

# Load the pre-trained DPR Context Encoder and Tokenizer
model_name = 'facebook/dpr-ctx_encoder-single-nq-base'
context_encoder = DPRContextEncoder.from_pretrained(model_name)
tokenizer = DPRContextEncoderTokenizer.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
context_encoder.to(device)

# Function to encode descriptions using DPR
def encode_descriptions(descriptions, batch_size=16):
    embeddings = []
    for i in range(0, len(descriptions), batch_size):
        batch = descriptions[i:i + batch_size].tolist()
        inputs = tokenizer(batch, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
        with torch.no_grad():
            outputs = context_encoder(**inputs).pooler_output
        embeddings.append(outputs.cpu().numpy())
    return np.vstack(embeddings)

# Encode the book descriptions and create FAISS index
embeddings = encode_descriptions(df_books["description"])
faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
faiss_index.add(embeddings)

# Function to parse the user query for preferences like author, genre, and rating
def parse_user_query(query):
    preferences = {
        "genre": None,
        "author": None,
        "rating_min": None,
        "rating_max": None,
        "keywords": []
    }
    
    # Extract genre, author, rating, and keywords from the query
    genres = ["romance", "mystery", "thriller", "fantasy", "science fiction", "non-fiction", "historical", "adventure"]
    for genre in genres:
        if genre.lower() in query.lower():
            preferences["genre"] = genre
    
    author_pattern = r"by\s([A-Za-z\s]+)"
    author_match = re.search(author_pattern, query)
    if author_match:
        preferences["author"] = author_match.group(1).strip()

    rating_min_pattern = r"(above|at least|greater than)\s(\d(\.\d+)?)"
    rating_max_pattern = r"(below|under)\s(\d(\.\d+)?)"
    
    min_rating_match = re.search(rating_min_pattern, query)
    max_rating_match = re.search(rating_max_pattern, query)
    
    if min_rating_match:
        preferences["rating_min"] = float(min_rating_match.group(2))
    if max_rating_match:
        preferences["rating_max"] = float(max_rating_match.group(2))

    keyword_list = ["happy ending", "fast-paced", "slow burn", "adventure", "unexpected twists", "magical realism"]
    for keyword in keyword_list:
        if keyword.lower() in query.lower():
            preferences["keywords"].append(keyword)
    
    return preferences

# Function to retrieve top N books using DPR
def retrieve_books(query, top_n=5):
    query_embedding = context_encoder(**tokenizer([query], return_tensors='pt', padding=True, truncation=True).to(device)).pooler_output
    query_embedding = query_embedding.cpu().detach().numpy()
    
    # Search the FAISS index
    distances, indices = faiss_index.search(query_embedding, top_n)
    
    # Retrieve book details
    results = df_books.iloc[indices[0]]
    return results

# Function to format the prompt for GPT to generate a recommendation
def format_prompt(user_query, filtered_books):
    prompt = f"User query: \"{user_query}\"\n\nBased on the following book descriptions, provide a personalized book recommendation:\n"
    for i, book in enumerate(filtered_books, start=1):
        prompt += f"\nBook {i}: \nTitle: {book['title']}\nAuthor(s): {book['authors']}\nRating: {book['average_rating']}\nDescription: {book['description']}\n"
    prompt += "\nProvide your recommendation and explain why it suits the user's preferences."
    return prompt

# Function to get GPT-3.5 response
def get_gpt_response(prompt):
    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant specializing in book recommendations."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=300
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error: {e}"

# Sample user query looking for a "thriller" book
#sample_user_query = "Can you recommend a thriller book?"

# Parse the user query to extract preferences
#preferences = parse_user_query(sample_user_query)

# Retrieve top 5 books based on preferences
#top_books = retrieve_books(sample_user_query, top_n=5)

# Print the retrieved top books
#print("Top 5 Books Suggested:")
#print(top_books[['title', 'authors', 'average_rating', 'description']])

# Format the prompt for GPT to generate a personalized recommendation
#prompt = format_prompt(sample_user_query, top_books.to_dict(orient='records'))

# Get the recommendation from GPT
#recommendation = get_gpt_response(prompt)

# Print the recommendation
#print("\nGenerated Recommendation:")
#print(recommendation)


Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokeniz

In [3]:

# Sample user query looking for a "thriller" book
sample_user_query = "Can you recommend a thriller book?"

# Parse the user query to extract preferences
preferences = parse_user_query(sample_user_query)

# Retrieve top 5 books based on preferences
top_books = retrieve_books(sample_user_query, top_n=5)

# Print the retrieved top books
print("Top 5 Books Suggested:")
print(top_books[['title', 'authors', 'average_rating', 'description']])

# Format the prompt for GPT to generate a personalized recommendation
prompt = format_prompt(sample_user_query, top_books.to_dict(orient='records'))

# Get the recommendation from GPT
recommendation = get_gpt_response(prompt)

# Print the recommendation
print("\nGenerated Recommendation:")
print(recommendation)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Top 5 Books Suggested:
                                                   title  \
548                           The Private Parts of Women   
5965                       The Complete Dream Dictionary   
2888   Waiting for the Party: The Life of Frances Hod...   
9013                           Fire Ice (NUMA Files  #3)   
10460                                             Tinsel   

                            authors  average_rating  \
548                 Lesley Glaister            3.80   
5965                    Pamela Ball            3.68   
2888                    Ann Thwaite            3.80   
9013   Clive Cussler/Paul Kemprecos            3.92   
10460               William Goldman            3.19   

                                             description  
548    A handsome new cover edition of Lesley Glaiste...  
5965           A practical guide to interpreting dreams.  
2888         A biography of the author of Secret Garden.  
9013                            A Kurt Austin adv

# Quiz

In [3]:
import pandas as pd
import re
import torch
import faiss
import numpy as np
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
from tqdm import tqdm

# Load the dataset (make sure it's uploaded to Kaggle environment)
df_books = pd.read_csv('FullDataSetGenreAndDescription.csv')

# Keep only relevant columns and drop rows with missing descriptions
df_books = df_books[['title', 'authors', 'description', 'average_rating', 'num_pages', 'ratings_count', 'isbn13', 'genre']]
df_books.dropna(subset=['description'], inplace=True)
df_books.reset_index(drop=True, inplace=True)

# Load pre-trained DPR Context Encoder and Tokenizer
model_name = 'facebook/dpr-ctx_encoder-single-nq-base'
context_encoder = DPRContextEncoder.from_pretrained(model_name)
tokenizer = DPRContextEncoderTokenizer.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to device
context_encoder.to(device)

# Function to encode book descriptions using DPR
def encode_descriptions(descriptions, batch_size=16):
    embeddings = []
    for i in tqdm(range(0, len(descriptions), batch_size)):
        batch = descriptions[i:i + batch_size].tolist()
        inputs = tokenizer(
            batch,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=512
        ).to(device)
        with torch.no_grad():
            outputs = context_encoder(**inputs).pooler_output
        embeddings.append(outputs.cpu().numpy())
    return np.vstack(embeddings)

# Encode descriptions and create FAISS index
embeddings = encode_descriptions(df_books["description"])
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(embeddings)

# Function to parse the user query (for personalized quiz answers)
def parse_user_query(query):
    author = None
    genre = None
    
    # Extract author if mentioned
    if 'by' in query:
        author_match = re.search(r'by ([\w\s]+)', query)
        if author_match:
            author = author_match.group(1).strip()

    # Extract genre if mentioned
    if 'genre' in query:
        genre_match = re.search(r'genre ([\w\s]+)', query)
        if genre_match:
            genre = genre_match.group(1).strip()

    # Remaining query as description keywords
    description_keywords = query.split()

    return author, genre, description_keywords

# Function to retrieve books based on the query
def retrieve_books(query, top_n=5):
    author, genre, description_keywords = parse_user_query(query)
    description_query = " ".join(description_keywords)
    
    # Encode the description query
    query_embedding = context_encoder(**tokenizer([description_query], return_tensors='pt', padding=True, truncation=True).to(device)).pooler_output
    query_embedding = query_embedding.cpu().detach().numpy()
    
    # Search the FAISS index for the top_n most similar books
    distances, indices = faiss_index.search(query_embedding, top_n)
    
    # Retrieve the details of the top books
    results = df_books.iloc[indices[0]]

    # Filter by author if mentioned
    if author:
        results = results[results['authors'].str.contains(author, case=False, na=False)]

    # Filter by genre if mentioned
    if genre:
        results = results[results['genre'].str.contains(genre, case=False, na=False)]

    return results

# Quiz function to collect user preferences
def personality_quiz():
    print("Welcome to the Book Personality Quiz! Let's find the perfect book for you.\n")
    
    # Question 1: Genre Preference
    genre = input("What genre do you prefer? (e.g., Fantasy, Mystery, Romance, Thriller, Sci-Fi): ")
    
    # Question 2: Book Length Preference
    book_length = input("Do you prefer shorter books (under 300 pages) or longer ones? (Short/Long): ")
    
    # Question 3: Reading Pace
    reading_pace = input("Do you prefer fast-paced books or slow, reflective ones? (Fast-paced/Slow-paced): ")
    
    # Formulate query based on user inputs
    query = genre + " " + book_length + " " + reading_pace
    
    # Retrieve books based on the query
    print("\nBased on your preferences, here are some book recommendations:\n")
    recommended_books = retrieve_books(query, top_n=5)
    
    # Display recommended books
    if not recommended_books.empty:
        for i, row in recommended_books.iterrows():
            print(f"Title: {row['title']}")
            print(f"Author: {row['authors']}")
            print(f"Average Rating: {row['average_rating']}")
            print(f"Number of Pages: {row['num_pages']}")
            print(f"Description: {row['description'][:200]}...")  # Displaying first 200 characters of the description
            print("-" * 50)
    else:
        print("Sorry, no recommendations found based on your preferences.")

# Run the personality quiz
#personality_quiz()


Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokeniz

# Interface

In [36]:
import tkinter as tk
from tkinter import messagebox

def personality_quiz():
    def submit_quiz():
        genre = genre_var.get()
        length = length_var.get()
        pace = pace_var.get()

        # Create a query based on user input
        query = f"{genre} {length} {pace}"
        
        # Retrieve books based on the query
        recommended_books = retrieve_books(query, top_n=5)
        
        # Format and display the recommended books
        result = "\n".join([f"Title: {book['title']}\nAuthor: {book['authors']}\nRating: {book['average_rating']}\nPages: {book['num_pages']}\nDescription: {book['description'][:200]}...\n{'-'*50}" for book in recommended_books.to_dict(orient='records')])
        
        if not result:
            result = "Sorry, no recommendations found based on your preferences."

        messagebox.showinfo("Quiz Recommendations", f"Based on your preferences, here are some books:\n{result}")

    # Create a new window for the quiz
    quiz_window = tk.Toplevel(root)
    quiz_window.title("Book Personality Quiz")

    tk.Label(quiz_window, text="Welcome to the Book Personality Quiz! Let's find the perfect book for you.", font=("Arial", 50)).pack(pady=20)

    # Genre question
    tk.Label(quiz_window, text="What genre do you prefer? (e.g., Fantasy, Mystery, Romance, Thriller, Sci-Fi)").pack()
    genre_var = tk.StringVar()
    tk.Entry(quiz_window, textvariable=genre_var).pack(pady=50)

    # Length question
    tk.Label(quiz_window, text="Do you prefer shorter books (under 300 pages) or longer ones? (Short/Long)").pack()
    length_var = tk.StringVar()
    tk.Entry(quiz_window, textvariable=length_var).pack(pady=50)

    # Pace question
    tk.Label(quiz_window, text="Do you prefer fast-paced books or slow, reflective ones? (Fast-paced/Slow-paced)").pack()
    pace_var = tk.StringVar()
    tk.Entry(quiz_window, textvariable=pace_var).pack(pady=50)

    # Submit button to process the answers
    tk.Button(quiz_window, text="Submit", command=submit_quiz).pack(pady=20)


def send_query():
    user_query = str(user_input.get())

    if not user_query.strip():
        messagebox.showwarning("Input Error", "Please enter a query before sending.")
        return

    # Display user query in chat log
    chat_log.config(state=tk.NORMAL)  # Allow editing of chat_log
    chat_log.insert(tk.END, f"You: {user_query}\n", "user")
    chat_log.yview(tk.END)  # Scroll to the bottom
    chat_log.config(state=tk.DISABLED)  # Disable editing after inserting

    # Process user query and get response
    preferences = parse_user_query(user_query)
    books = retrieve_books(user_query, top_n=5)
    prompt = format_prompt(user_query, books.to_dict(orient='records'))
    response = get_gpt_response(prompt)

    # Display bot's response in chat log
    chat_log.config(state=tk.NORMAL)  # Allow editing of chat_log
    chat_log.insert(tk.END, f"Bot: {response}\n\n", "bot")
    chat_log.yview(tk.END)  # Scroll to the bottom
    chat_log.config(state=tk.DISABLED)  # Disable editing after inserting

    # Clear the user input field
    user_input.delete(0, tk.END)


# Main application window
root = tk.Tk()
root.title("Book Recommendation Chatbot")
root.geometry("1000x900")  # Increased window size

# Welcome message
welcome_label = tk.Label(root, text="Thank you for using our Book Recommendation Chatbot!", font=("Arial", 700), pady=20)
welcome_label.pack()

# Chat log
chat_frame = tk.Frame(root)
chat_frame.pack(pady=20, fill=tk.BOTH, expand=True)

chat_log = tk.Text(chat_frame, wrap=tk.WORD, state=tk.DISABLED, font=("Arial", 60))  # Larger font size
chat_log.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

scrollbar = tk.Scrollbar(chat_frame, command=chat_log.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
chat_log["yscrollcommand"] = scrollbar.set

# Apply tags for formatting user and bot messages
chat_log.tag_configure("user", foreground="blue", font=("Arial", 60, "bold"))
chat_log.tag_configure("bot", foreground="green", font=("Arial", 60))

# User input
input_frame = tk.Frame(root)
input_frame.pack(pady=40, fill=tk.X)

user_input = tk.Entry(input_frame, font=("Arial", 60))  # Larger font size
user_input.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=10, pady=20)

send_button = tk.Button(input_frame, text="Send", command=send_query, font=("Arial", 60))  # Larger button font
send_button.pack(side=tk.RIGHT, padx=10)

# Quiz button
quiz_button = tk.Button(root, text="Take Our Quiz", command=personality_quiz, font=("Arial", 60), pady=30)  # Larger font for button
quiz_button.pack(pady=30)

root.mainloop()


In [39]:
import tkinter as tk
from tkinter import messagebox

def personality_quiz():
    def submit_quiz():
        genre = genre_var.get()
        length = length_var.get()
        pace = pace_var.get()

        # Create a query based on user input
        query = f"{genre} {length} {pace}"
        
        # Retrieve books based on the query
        recommended_books = retrieve_books(query, top_n=5)
        
        # Format and display the recommended books
        result = "\n".join([f"Title: {book['title']}\nAuthor: {book['authors']}\nRating: {book['average_rating']}\nPages: {book['num_pages']}\nDescription: {book['description'][:200]}...\n{'-'*50}" for book in recommended_books.to_dict(orient='records')])
        
        if not result:
            result = "Sorry, no recommendations found based on your preferences."

        messagebox.showinfo("Quiz Recommendations", f"Based on your preferences, here are some books:\n{result}")

    # Create a new window for the quiz
    quiz_window = tk.Toplevel(root)
    quiz_window.title("Book Personality Quiz")

    tk.Label(quiz_window, text="Welcome to the Book Personality Quiz! Let's find the perfect book for you.", font=("Arial", 40)).pack(pady=20)

    # Genre question
    tk.Label(quiz_window, text="What genre do you prefer? (e.g., Fantasy, Mystery, Romance, Thriller, Sci-Fi)", font=("Arial", 30)).pack()
    genre_var = tk.StringVar()
    tk.Entry(quiz_window, textvariable=genre_var, font=("Arial", 30)).pack(pady=10)

    # Length question
    tk.Label(quiz_window, text="Do you prefer shorter books (under 300 pages) or longer ones? (Short/Long)", font=("Arial", 30)).pack()
    length_var = tk.StringVar()
    tk.Entry(quiz_window, textvariable=length_var, font=("Arial", 30)).pack(pady=10)

    # Pace question
    tk.Label(quiz_window, text="Do you prefer fast-paced books or slow, reflective ones? (Fast-paced/Slow-paced)", font=("Arial", 30)).pack()
    pace_var = tk.StringVar()
    tk.Entry(quiz_window, textvariable=pace_var, font=("Arial", 30)).pack(pady=10)

    # Submit button to process the answers
    tk.Button(quiz_window, text="Submit", command=submit_quiz, font=("Arial", 30)).pack(pady=20)


def send_query():
    user_query = str(user_input.get())

    if not user_query.strip():
        messagebox.showwarning("Input Error", "Please enter a query before sending.")
        return

    # Display user query in chat log
    chat_log.config(state=tk.NORMAL)  # Allow editing of chat_log
    chat_log.insert(tk.END, f"You: {user_query}\n", "user")
    chat_log.yview(tk.END)  # Scroll to the bottom
    chat_log.config(state=tk.DISABLED)  # Disable editing after inserting

    # Process user query and get response
    preferences = parse_user_query(user_query)
    books = retrieve_books(user_query, top_n=5)
    prompt = format_prompt(user_query, books.to_dict(orient='records'))
    response = get_gpt_response(prompt)

    # Display bot's response in chat log
    chat_log.config(state=tk.NORMAL)  # Allow editing of chat_log
    chat_log.insert(tk.END, f"Bot: {response}\n\n", "bot")
    chat_log.yview(tk.END)  # Scroll to the bottom
    chat_log.config(state=tk.DISABLED)  # Disable editing after inserting

    # Clear the user input field
    user_input.delete(0, tk.END)


# Main application window
root = tk.Tk()
root.title("Book Recommendation Chatbot")
root.geometry("1400x1200")  # Increased window size

# Welcome message
welcome_label = tk.Label(root, text="Thank you for using our Book Recommendation Chatbot!", font=("Arial", 36), pady=20)
welcome_label.pack()

# Chat log
chat_frame = tk.Frame(root)
chat_frame.pack(pady=20, fill=tk.BOTH, expand=True)

chat_log = tk.Text(chat_frame, wrap=tk.WORD, state=tk.DISABLED, font=("Arial", 28))  # Larger font size
chat_log.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

scrollbar = tk.Scrollbar(chat_frame, command=chat_log.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
chat_log["yscrollcommand"] = scrollbar.set

# Apply tags for formatting user and bot messages
chat_log.tag_configure("user", foreground="blue", font=("Arial", 28, "bold"))
chat_log.tag_configure("bot", foreground="green", font=("Arial", 28))

# User input
input_frame = tk.Frame(root)
input_frame.pack(pady=20, fill=tk.X)

user_input = tk.Entry(input_frame, font=("Arial", 28))  # Larger font size
user_input.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=10, pady=10)

send_button = tk.Button(input_frame, text="Send", command=send_query, font=("Arial", 28))  # Larger button font
send_button.pack(side=tk.RIGHT, padx=10)

# Quiz button
quiz_button = tk.Button(root, text="Take Our Quiz", command=personality_quiz, font=("Arial", 28), pady=10)  # Larger font for button
quiz_button.pack(pady=20)

root.mainloop()
