# Building your own recommender system

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

#### Read the data

In [2]:
PATH = 'google_books_1299.csv'

def get_data(path_to_data):

    data = pd.read_csv(f'{path_to_data}',index_col=0)
    data["title"] = data["title"].str.lower()
    data = data.dropna()
    data.index = [i for i in range(0,len(data))]
    return data

In [3]:
data = get_data(PATH)

# Create a content-based recommender system using cosine similarity

In [4]:
from gensim.utils import simple_preprocess
data['generes'] = data['generes'].astype('string')
data['preprocessed_genre'] = data['generes'].apply(
    lambda genre: simple_preprocess(genre.replace('&', '').replace('amp', '').replace(',', '').replace('none', ''), min_len=3) if type(genre) is str else '')

In [5]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy import spatial

In [6]:
##This framework provides an easy method to compute dense vector representations for sentences.The models are based on transformer networks like BERT.Text is embedding in vector space such that similar text is close and can efficiently be found using cosine similarity.
from sentence_transformers import SentenceTransformer 
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [7]:
def recommender(genres, data):
    book_dic = {} #create a dictionary
    
    for index, value in data.iterrows(): #.iterrows: Iterate over DataFrame rows as (index, Series) pairs. -read the DataFrame row by row
        genres_embeddings = model.encode([genres]) #encode genres
        value_embeddings = model.encode(', '.join([str(elem) for elem in value[13]])) #encode the values in genres
        cosine = 1 - spatial.distance.cosine(genres_embeddings, value_embeddings) #1- cosine distance between the two =similarity
        if cosine > 0.7: 
            if index in book_dic.keys():
                continue
            else:
                book_dic[index] = float(value[2]) #list the books from high to low rate
        else:
            continue
    if book_dic and len(list(book_dic.keys()))>=5: #condition when there are more than 5 qualified books
        book_dic = dict(sorted(book_dic.items(), key=lambda x: x[1], reverse=True)) #list the books from high to low cosine value
        book_indexs = list(book_dic.keys())[:5] #show the top 5 books
        recommendation = pd.DataFrame(columns=['ISBN','title']) #create colunms and the format shown is ISBN,title
        for book_index in book_indexs:
            book_ISBN = data['ISBN'].iloc[book_index] #map the 'ISBN' according to the book index
            book_title = data['title'].iloc[book_index] #map the 'title' according to the book index
            recommendation=recommendation.append({'ISBN' : book_ISBN , 'title' : book_title} , ignore_index=True) #append the list     
        return recommendation
    elif book_dic:
        print("We can only give you less than 5 recommendations") #condition when there are less than 5 qualified books 
        book_dic = dict(sorted(book_dic.items(), key=lambda x: x[1], reverse=True))
        book_indexs = list(book_dic.keys())
        recommendation = pd.DataFrame(columns=['ISBN','title'])
        for book_index in book_indexs:
            book_ISBN = data['ISBN'].iloc[book_index]
            book_title = data['title'].iloc[book_index]
            recommendation=recommendation.append({'ISBN' : book_ISBN , 'title' : book_title} , ignore_index=True)      
        return recommendation
        
    else:
        recommendation = "Sorry, we do not have any recommentation for you."
        return recommendation
        
def Content_based_recommender(data):

    data = data[data['preprocessed_genre'].notna()] #drop the missing value
    genres_set = set()
    for i in data['preprocessed_genre']:
        for genre in i:
            genres_set.add(genre)
    
    print(f"What type of genre do you like? \n\nYou can choose from the following:\n\n{genres_set}")
    genres = input().lower()
    
    recommendations = recommender(genres, data)


    return recommendations

In [None]:
Content_based_recommender(data)