# Libraries

In [2]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# Read Data

In [3]:
data = pd.read_csv(r"/home/mohamedelawakey/Desktop/Programming Books Recommendation System /ml/data/processed/v1/book_backend_full_features.csv")
data.head()

Unnamed: 0,Name,Authors,Rating,PublishYear,Publisher,RatingDist5,RatingDist4,RatingDist3,RatingDist2,RatingDist1,...,emb374,emb375,emb376,emb377,emb378,emb379,emb380,emb381,emb382,emb383
0,Between Therapists: The Processing of Transfer...,Arthur Robbins,5.0,1999,Jessica Kingsley Publishers,3,0,0,0,0,...,0.080532,0.01215,0.032719,0.01712,-0.087718,0.072108,0.046356,0.13046,-0.033658,-0.037323
1,Last Word: Media Coverage of the Supreme Court...,Florian Sauvageau,5.0,2005,University of British Columbia Press,1,0,0,0,0,...,0.026014,0.049502,0.051143,0.057057,-0.05134,0.050311,0.005685,0.018655,0.004255,0.060309
2,Autumn Wisdom: A Book of Readings,Richard L. Morgan,5.0,2007,Wipf & Stock Publishers,1,0,0,0,0,...,-0.024516,0.018459,-0.025158,0.031864,-0.049244,-0.088706,0.041197,-0.036828,-0.031374,-0.031394
3,James Denney (1856-1917),James M. Gordon,5.0,2006,Wipf & Stock Publishers,1,0,0,0,0,...,0.02387,0.062438,0.008619,0.072061,-0.063944,0.024584,0.032356,-0.020164,0.008109,-0.042561
4,Cautious Rebel: A Biography of Susan Clay Smitzky,Lindsey Apple,5.0,1997,Kent State University Press,1,0,0,0,0,...,-0.047381,-0.054794,0.028854,0.044863,0.041969,-0.036443,0.000149,-0.049355,0.003883,-0.016954


### show types of each column in data to select features in Reranking

In [4]:
for i in data.columns:
    print(i, ':', data[i].dtype)

Name : str
Authors : str
Rating : float64
PublishYear : int64
Publisher : str
RatingDist5 : int64
RatingDist4 : int64
RatingDist3 : int64
RatingDist2 : int64
RatingDist1 : int64
RatingDistTotal : int64
CountsOfReview : float64
Description : str
tech_score : int64
Pages : float64
weighted_rating : float64
average_rating_5 : float64
average_rating_4 : float64
average_rating_2 : float64
average_rating_1 : float64
average_high_rating : float64
average_low_rating : float64
Publisher_frequently : float64
Authors_frequently : float64
Pages_scaled : float64
PublishYear_scaled : float64
RatingDistTotal_scaled : float64
tech_score_scaled : float64
CountsOfReview_log : float64
CountsOfReview_scaled : float64
Name_cleaned : str
Description_cleaned : str
text_for_embedding : str
emb0 : float64
emb1 : float64
emb2 : float64
emb3 : float64
emb4 : float64
emb5 : float64
emb6 : float64
emb7 : float64
emb8 : float64
emb9 : float64
emb10 : float64
emb11 : float64
emb12 : float64
emb13 : float64
emb14 : f

# load the embedding model

In [5]:
model = SentenceTransformer('all-MiniLM-L6-v2')

### test the model to return first 30 books nearest to the user query without Reranking

In [6]:
user_input = input('enter the book')

user_embedding = model.encode(user_input)

In [7]:
books_embeddings = data[[f'emb{i}' for i in range(384)]].values
similarities = cosine_similarity([user_embedding], books_embeddings)[0]
similarities_series = pd.Series(similarities)

In [8]:
top_indicates = similarities_series.nlargest(30).index

recommended_books = data.iloc[top_indicates]
print(recommended_books[['Name', 'Authors', 'Description']])

                                                     Name  \
391654                     The Promise of Neural Networks   
155970              Neural Networks and Learning Machines   
267919  Machine Learning: An Artificial Intelligence A...   
14632   Understanding Neural Networks, Vol. 1 (IBM Ver...   
394406  Neural Networks for Vision, Speech and Natural...   
200147                 An Introduction to Neural Networks   
295773                  Machine Learning Proceedings 1990   
342914  Neural Nets: 13th Italian Workshop on Neural N...   
339028           Progress in Neural Networks, Volume Four   
384750             Guide to Neural Computing Applications   
305348            Machine Learning (Inaugural Lecture S.)   
284400                        Neural Networks: A Tutorial   
156260                     The Essence Of Neural Networks   
382494  Artificial Neural Networks - ICANN 2006: 16th ...   
150063                                    Neural Networks   
123220  Machine Learning

In [9]:
# selected features for Reranking
"""
weighted_rating
CountsOfReview
tech_score
PublishYear_scaled
average_high_rating 
average_low_rating
"""

# equation
"""
rerank_score = 0.5 * similarity + 0.2 * weighted_rating + 0.15 * CountsOfReview_scaled + 0.05 * tech_score_scaled + 0.05 * PublishYear_scaled - 0.05 * average_low_rating
"""

'\nrerank_score = 0.5 * similarity + 0.2 * weighted_rating + 0.15 * CountsOfReview_scaled + 0.05 * tech_score_scaled + 0.05 * PublishYear_scaled - 0.05 * average_low_rating\n'

In [10]:
data['similarity'] = similarities
data.head()

  data['similarity'] = similarities


Unnamed: 0,Name,Authors,Rating,PublishYear,Publisher,RatingDist5,RatingDist4,RatingDist3,RatingDist2,RatingDist1,...,emb375,emb376,emb377,emb378,emb379,emb380,emb381,emb382,emb383,similarity
0,Between Therapists: The Processing of Transfer...,Arthur Robbins,5.0,1999,Jessica Kingsley Publishers,3,0,0,0,0,...,0.01215,0.032719,0.01712,-0.087718,0.072108,0.046356,0.13046,-0.033658,-0.037323,0.13111
1,Last Word: Media Coverage of the Supreme Court...,Florian Sauvageau,5.0,2005,University of British Columbia Press,1,0,0,0,0,...,0.049502,0.051143,0.057057,-0.05134,0.050311,0.005685,0.018655,0.004255,0.060309,0.063319
2,Autumn Wisdom: A Book of Readings,Richard L. Morgan,5.0,2007,Wipf & Stock Publishers,1,0,0,0,0,...,0.018459,-0.025158,0.031864,-0.049244,-0.088706,0.041197,-0.036828,-0.031374,-0.031394,0.013263
3,James Denney (1856-1917),James M. Gordon,5.0,2006,Wipf & Stock Publishers,1,0,0,0,0,...,0.062438,0.008619,0.072061,-0.063944,0.024584,0.032356,-0.020164,0.008109,-0.042561,0.04221
4,Cautious Rebel: A Biography of Susan Clay Smitzky,Lindsey Apple,5.0,1997,Kent State University Press,1,0,0,0,0,...,-0.054794,0.028854,0.044863,0.041969,-0.036443,0.000149,-0.049355,0.003883,-0.016954,0.024661


In [11]:
top_n = 50
recommended_books = data.nlargest(top_n, 'similarity').copy()

recommended_books['rerank_score'] = (
    0.5 * recommended_books['similarity'] +
    0.2 * recommended_books['weighted_rating'] +
    0.15 * recommended_books['CountsOfReview_scaled'] +
    0.05 * recommended_books['tech_score_scaled'] +
    0.05 * recommended_books['PublishYear_scaled'] -
    0.05 * recommended_books['average_low_rating']
)

In [12]:
top_30_books = recommended_books.sort_values(by='rerank_score', ascending=False).head(30)
print(top_30_books[['Name', 'Authors', 'Description', 'rerank_score']])

                                                     Name  \
14632   Understanding Neural Networks, Vol. 1 (IBM Ver...   
21336   Backpropagation: Theory, Architectures, and Ap...   
31939                            Building Neural Networks   
46913       Machine Learning: A Guide to Current Research   
123220  Machine Learning: A Multistrategy Approach, Vo...   
103749  Machine Learning: ESML 2006: 17th European Con...   
105466  Machine Learning: Ecml 94: European Conference...   
123647           Machine Learning: A Theoretical Approach   
236055  Machine Learning Techniques for Multimedia: Ca...   
270690                   Introduction to Machine Learning   
295773                  Machine Learning Proceedings 1990   
284400                        Neural Networks: A Tutorial   
293377  Computational Learning Theory and Natural Lear...   
136871  Fundamentals of Neural Networks: Architectures...   
143082            Pattern Recognition and Neural Networks   
88606                Neu

In [13]:
data.columns

Index(['Name', 'Authors', 'Rating', 'PublishYear', 'Publisher', 'RatingDist5',
       'RatingDist4', 'RatingDist3', 'RatingDist2', 'RatingDist1',
       ...
       'emb375', 'emb376', 'emb377', 'emb378', 'emb379', 'emb380', 'emb381',
       'emb382', 'emb383', 'similarity'],
      dtype='str', length=418)

In [14]:
data.to_csv(r'/home/mohamedelawakey/Desktop/Programming Books Recommendation System /ml/data/similarity/v1/books_with_similarity.csv', index=False, encoding='utf-8')
print('saved is successfully')

saved is successfully
