In [None]:
!pip install langchain

In [None]:
!pip install sentence_transformers

In [None]:
pip install faiss-cpu

In [4]:
import torch
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from langchain.embeddings import HuggingFaceEmbeddings

In [5]:
# Load the books summary data from the JSON file
from google.colab import drive
drive.mount('/content/drive')

with open("/content/drive/MyDrive/document/book.json", "r") as file:
    books_data = json.load(file)

# Extract summaries from the books data
documents = [book["description"] for book in books_data]

Mounted at /content/drive


In [6]:
# Initialize the SentenceTransformer model for embeddings
embeddings_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Encode all documents using SentenceTransformer embeddings
encoded_documents = embeddings_model.encode(documents)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [7]:
# Store embeddings in Faiss index
dimension = encoded_documents.shape[1]
index = faiss.IndexFlatIP(dimension)  # Flat index for inner product (cosine similarity)
index.add(encoded_documents.astype(np.float32))

# Function to retrieve books using Faiss index
def retrieve_book(query, index, books_data, k=5):
    query_embedding = embeddings_model.encode([query])[0].astype(np.float32)
    _, indices = index.search(np.array([query_embedding]), k)
    top_indices = indices[0]
    top_books = [books_data[i] for i in top_indices]
    return top_books

In [8]:
# Load GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [9]:
# Function to generate response using GPT-2 model
def generate_responses(prompt):
    inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=1024, truncation=True)
    attention_mask = inputs.clone().detach()
    attention_mask.fill_(1)
    outputs = model.generate(inputs, attention_mask=attention_mask, pad_token_id=tokenizer.eos_token_id,
                             max_length=100, num_return_sequences=1, temperature=0.7, do_sample=True)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [16]:
# Chatbot Logic and User Interaction
def chatbot_response(user_input, context):
    if user_input.lower() == "exit":
        return "Goodbye!"
    elif user_input.lower() == "help":
        return "I can help you find book recommendations. Just tell me what you're interested in!"
    else:
        # Using GPT-2 to generate response based on context and user input
        prompt = f"{context}\nUser: {user_input}\nBot:"
        gpt_response = generate_responses(prompt)
        # Retrieve books using Faiss index
        relevant_books = retrieve_book(user_input, index, books_data)
        if relevant_books:
            response = f"{gpt_response}\n\nHere are some books you might like:\n"
            for book in relevant_books:
                response += f"Title: {book['title']}\nAuthor: {book['author']}\nDescription: {book['description']}\n\n"
        else:
            response = f"{gpt_response}\n\nI couldn't find any relevant books. Could you please try rephrasing your query?"
        return response

In [17]:
# User Interaction
print("Welcome to the Book Recommendation Chatbot!")
print("Type 'help' for assistance or 'exit' to end the conversation")

context = ""

while True:
    user_input = input("You: ")
    bot_response = chatbot_response(user_input, context)
    print("Bot:", bot_response)
    if user_input.lower() == "exit":
        break
    context += f"\nUser: {user_input}\nBot: {bot_response}\n"

Welcome to the Book Recommendation Chatbot!
Type 'help' for assistance or 'exit' to end the conversation
You: help
Bot: I can help you find book recommendations. Just tell me what you're interested in!
You: I am interested in Science Fiction
Bot: 

Here are some books you might like:
Title: The Forge of God
Author: Greg Bear
Description:  The novel features scenes and events including the discovery of a near-dead alien in the desert, who clearly says in English, "I'm sorry, but there is bad news," and this alien's subsequent interrogation and autopsy; the discovery of an artificial geological formation and its subsequent nuclear destruction by a desperate military; and the Earth's eventual destruction by the mutual annihilation of a piece of neutronium and a piece of antineutronium dropped into Earth's core. There is another alien faction at work, however, represented on Earth by small spider-like robots that recruit human agents through some form of mind control. They frantically coll