In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from transformers import LlamaForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
import torch
import pandas as pd
import numpy as np
from huggingface_hub import login


In [None]:
from google.colab import userdata
hf_token = userdata.get('HF_token')
login(token=hf_token)


In [None]:
file_path = "/content/drive/MyDrive/AI Lab/books_1.Best_Books_Ever.csv"
books_df = pd.read_csv(file_path, dtype={'description': str, 'title': str, 'genres': str})  # Force 'description', 'name', and 'genre' columns to be string type
# If there are any NaN values in the description, name, or genre columns, fill them with empty strings
books_df.fillna('', inplace=True)
books_df = books_df.sample(n=2000, random_state=42)  #
books_df.head()


  books_df.fillna('', inplace=True)


Unnamed: 0,bookId,title,series,author,rating,description,language,isbn,genres,characters,...,firstPublishDate,awards,numRatings,ratingsByStars,likedPercent,setting,coverImg,bbeScore,bbeVotes,price
23058,989097.Discworld_Companion,Discworld Companion,Discworld Companion Books,"Terry Pratchett, Stephen Briggs",3.92,For the newcomer and old hand alike the Discwo...,English,9780575600300,"['Fantasy', 'Fiction', 'Reference', 'Humor', '...",[],...,10/30/94,[],2494,"['855', '808', '649', '134', '48']",93.0,[],https://i.gr-assets.com/images/S/compressed.ph...,99,1,0.85
19809,6238142-ludivine,Ludivine,Les gens de Mogador #2,Élisabeth Barbier,3.84,Du haut de ses seize ans Ludivine Peyrissac ne...,French,9782266143622,[],[],...,10/29/47,[],31,"['8', '11', '11', '1', '0']",97.0,[],https://i.gr-assets.com/images/S/compressed.ph...,100,1,6.67
29248,722192.The_Power_of_Two,The Power of Two,T*Witches #1,"H.B. Gilmour, Randi Reisfeld (Goodreads Author)",3.6,Camryn and Alex are twin witches that were rai...,English,9780613366106,"['Fantasy', 'Young Adult', 'Witches', 'Paranor...",[],...,05/01/01,[],1863,"['446', '519', '654', '190', '54']",87.0,[],https://i.gr-assets.com/images/S/compressed.ph...,95,2,
43511,6773700-the-midnight-hunt,The Midnight Hunt,Midnight Hunters #1,L.L. Raand,3.98,Medic Ryon Drake has never been good at follow...,English,9781602821408,"['Lesbian', 'LGBT', 'Fantasy', 'Paranormal', '...",[],...,,[],2438,"['988', '758', '445', '159', '88']",90.0,[],https://i.gr-assets.com/images/S/compressed.ph...,73,1,17.27
19002,13419938-beechi,"Beechi: Bulletsu, Bombsu, Bhagavadgeete",,Keshava Rao B.S.,4.13,"Collection of Beechi's speeches, articles and ...",Kannada,9999999999999,['Humor'],[],...,,[],104,"['45', '39', '12', '5', '3']",92.0,[],https://i.gr-assets.com/images/S/compressed.ph...,100,1,


In [None]:
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device="cuda")
books_df['embedding'] = books_df['description'].apply(
    lambda desc: embed_model.encode(desc, convert_to_tensor=True).to("cuda") if isinstance(desc, str) else None
)
books_df.dropna(subset=['embedding'], inplace=True)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
class BookRecommender:
    def __init__(self, llama_model_name="meta-llama/Llama-3.2-1B"):
        self.tokenizer = AutoTokenizer.from_pretrained(llama_model_name)
        self.llama_model = LlamaForCausalLM.from_pretrained(llama_model_name, torch_dtype=torch.float16, device_map="cuda").to("cuda").eval()
        self.books_df = None

    def recommend_books(self, input_desc, embed_model, top_n=5):
        input_embedding = embed_model.encode(input_desc, convert_to_tensor=True).to("cuda")

        self.books_df['similarity'] = self.books_df['embedding'].apply(
            lambda emb: torch.nn.functional.cosine_similarity(emb.to("cuda"), input_embedding, dim=0).item()
        )

        return self.books_df.nlargest(top_n, 'similarity')[['title', 'genres', 'similarity']]




In [None]:
recommender = BookRecommender()
recommender.books_df = books_df

# Get book recommendations
user_input_description = input("Describe the book you'd like to read today: ")
recommended_books = recommender.recommend_books(user_input_description, embed_model)
print("Recommended Books:")
print(recommended_books)




Describe the book you'd like to read today: an epic fantasy
Recommended Books:
                      title  \
41797    The Secret Country   
26635  The Tower of Sorcery   
44488               Beowulf   
14363     The Book of Atrus   
3780    In the Night Garden   

                                                  genres  similarity  
41797  ['Fantasy', 'Young Adult', 'Fiction', 'Magic',...    0.524227  
26635  ['Fantasy', 'Magic', 'Adventure', 'High Fantas...    0.500091  
44488  ['Classics', 'Graphic Novels', 'Fantasy', 'Fic...    0.475703  
14363  ['Fantasy', 'Fiction', 'Science Fiction', 'Adv...    0.472252  
3780   ['Fantasy', 'Fiction', 'Fairy Tales', 'Short S...    0.459665  
