In [105]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier

In [106]:
df = pd.read_csv('books_4.csv')
df['language_code'].fillna('eng', inplace=True)
features = ['language_code', 'Author', 'genre']

X = df[features]
Y = df['Book Name']

In [107]:
print("Missing values distribution: ")
print(X.isna().mean())
print('---')
print(Y.isna().mean())
print('---')
print(X.isnull().mean())
print('---')
print(Y.isnull().mean())

Missing values distribution: 
language_code    0.0
Author           0.0
genre            0.0
dtype: float64
---
0.0
---
language_code    0.0
Author           0.0
genre            0.0
dtype: float64
---
0.0


In [108]:
X['genre'] = X['genre'].replace('genre', '', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['genre'] = X['genre'].replace('genre', '', regex=True)


In [109]:
print(X['genre'])

0        fiction
1        fiction
2        fiction
3        fiction
4        fiction
          ...   
1005     fiction
1006     fiction
1007     fiction
1008     fiction
1009     fiction
Name: genre, Length: 1010, dtype: object


In [110]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [111]:
print(X_train.shape)
print(X_test.shape)

(808, 3)
(202, 3)


In [112]:
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train.apply(lambda x: ' '.join(x), axis=1))

In [113]:
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train_vectorized, Y_train)

In [119]:
test_book = X_test.iloc[0]
user_vector = vectorizer.transform([' '.join(test_book)])
print(test_book)

language_code           en-US
Author           Barack Obama
genre                 fiction
Name: 629, dtype: object


In [120]:
#single prediction respect to lang code, author, genere
print("Recommendation -- ")
print(classifier.predict(user_vector))

Recommendation -- 
['Bag of Bones']


In [121]:
#for multiple recommmendations
distances, indices = classifier.kneighbors(user_vector)

for i, index in enumerate(indices[0]):
    book = df.iloc[index]
    print(f"{i+1}. Title: {book['Book Name']}, Author: {book['Author']}, Genre: {book['genre']}")

1. Title: The Awakening, Author: Kelley Armstrong, Genre: genre fiction
2. Title: Equal Rites, Author: Terry Pratchett, Genre: genre fiction
3. Title: Caps for sale: a tale of a peddler, some monkeys and their monkey business, Author: Esphyr Slobodkina, Genre: genre fiction
4. Title: The Last Juror, Author: John Grisham, Genre: nonfiction
5. Title: Band of Brothers: E Company, 506th Regiment, 101st Airborne from Normandy to Hitler's Eagle's Nest, Author: Stephen E. Ambrose, Genre: genre fiction


analyze section


In [117]:
y_pred = classifier.predict(vectorizer.transform(X_test.apply(lambda x: ' '.join(x), axis=1)))

In [118]:
print(y_pred)

['Bag of Bones' 'Bag of Bones' 'Death Comes to Pemberley'
 'A Court of Mist and Fury' 'Bag of Bones' 'Before They Are Hanged'
 'Bag of Bones' 'Absolute Power' 'Contact' 'A Spool of Blue Thread'
 'A Painted House' 'A Bruxa de Portobello' 'Eyes of the Dragon'
 'Death Comes to Pemberley' 'Bag of Bones' ' The Tale of Despereaux'
 'Death Comes to Pemberley' 'A Crown of Swords' 'Change of Heart'
 'A Visit From the Goon Squad' 'Death Comes to Pemberley' 'Bag of Bones'
 'Catherine the Great: Portrait of a Woman' 'Death Comes to Pemberley'
 'Fool Moon ' 'Death Comes to Pemberley' 'A Crown of Swords'
 'Death Comes to Pemberley' 'Death Comes to Pemberley'
 'Death Comes to Pemberley' 'Death Comes to Pemberley'
 'Death Comes to Pemberley' 'Death Comes to Pemberley' 'Bag of Bones'
 'The Merchant of Venice' 'Howl and Other Poems ' 'Howl and Other Poems '
 'Death Masks' 'Death Comes to Pemberley'
 "America (The Book): A Citizen's Guide to Democracy Inaction"
 'Death Comes to Pemberley' 'Cress' 'A Brea