In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Sample dataset
data = [
    [1,"The Lost Kingdom","A thrilling adventure of a young explorer who discovers a hidden kingdom","John Carter"],
    [2,"Whispers of the Night","A mystery novel about strange whispers haunting a small town","Mary Adams"],
    [3,"Ocean's Secret","A marine biologist uncovers secrets deep beneath the ocean","Peter White"],
    [4,"The Time Traveler's Diary","The journey of a man traveling across centuries through a magical diary","Linda Grey"],
    [5,"Shadows in the Forest","A suspenseful story of hikers trapped in a haunted forest","David King"],
    [6,"Journey to Mars","An astronaut’s first human mission to Mars turns into a survival challenge","Susan Lee"],
    [7,"The Hidden Truth","An investigative journalist uncovers political corruption","Michael Brown"],
    [8,"Under the Crimson Sky","A love story set in the midst of a civil war","Emily Stone"],
    [9,"The Cursed Painting","A cursed painting brings misfortune to its owners","Anna Green"],
    [10,"Code of the Future","A young programmer creates an AI that changes the world","Robert Hall"],
    [11,"Desert Storm","A survival story of a pilot stranded in the desert","Richard West"],
    [12,"Melody of the Heart","A gifted pianist navigates love and ambition","Sophia Taylor"],
    [13,"The Last Heir","A royal family’s last surviving heir must reclaim the throne","Thomas Reed"],
    [14,"City of Glass","A detective investigates crimes in a city made entirely of glass buildings","Olivia Turner"],
    [15,"The Forgotten Island","Shipwreck survivors discover an island with dark secrets","Henry Clarke"],
    [16,"Secrets of the Library","A librarian finds an ancient manuscript leading to treasure","Sarah Bell"],
    [17,"The Quantum Code","A scientist unlocks a code that can alter reality","Daniel Scott"],
    [18,"Footprints in the Snow","A cold-case murder investigation in a snowy mountain town","Laura Parker"],
    [19,"The Eternal Flame","A fantasy tale of a warrior seeking a legendary flame to save her kingdom","Nathan Brooks"],
    [20,"Voices from the Past","An archaeologist hears voices from ancient ruins","Grace Miller"]
]

df = pd.DataFrame(data, columns=["book_id", "title", "summary", "author"])

# Combine summary + author for features
df["content"] = df["summary"] + " " + df["author"]

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df["content"])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to recommend books
def recommend_books(title, num_recommendations=5):
    if title not in df["title"].values:
        return f"Book '{title}' not found in dataset."
    
    idx = df.index[df["title"] == title][0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]  # Skip itself
    
    recommendations = []
    for i, score in sim_scores:
        recommendations.append((df.iloc[i]["title"], score))
    return recommendations

# Example usage
print("Recommendations for 'The Lost Kingdom':")
for rec, score in recommend_books("The Lost Kingdom"):
    print(f"- {rec} (score: {score:.2f})")


Recommendations for 'The Lost Kingdom':
- Code of the Future (score: 0.09)
- The Eternal Flame (score: 0.08)
- Whispers of the Night (score: 0.00)
- Ocean's Secret (score: 0.00)
- The Time Traveler's Diary (score: 0.00)
