### Baseline model for predicting reccomended book

In [1]:
import pandas as pd
import numpy as np

In [2]:
import sys
sys.path.append('../')

from functions.fun_model import recommend_books, not_recommended_books, prepare_data_assoc_rules, rank_books

### Import dataset, split into train/val/test

In [3]:
df = pd.read_csv('model_data/model_data_wo_eXtern.csv')

print(df.shape)
df.head(5)

(1149780, 13)


Unnamed: 0,User-ID,Age,City,Region/State,Country,Age_Missing,ISBN,Book-Rating,Rating_Categ,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,2,18.0,stockton,california,usa,0,195153448,0,Not rated,Classical Mythology,Mark P. O. Morford,2002.0,Oxford University Press
1,7,-1.0,washington,dc,usa,1,34542252,0,Not rated,,,,
2,8,-1.0,timmins,ontario,canada,1,2005018,5,Avarege (4-5),Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada
3,8,-1.0,timmins,ontario,canada,1,60973129,0,Not rated,Decision in Normandy,Carlo D'Este,1991.0,HarperPerennial
4,8,-1.0,timmins,ontario,canada,1,374157065,0,Not rated,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999.0,Farrar Straus Giroux


### Prepare data for association rules

In [4]:
df_subset = prepare_data_assoc_rules(df)
print(df_subset.shape)
df_subset.head(5)

(110362, 15)


Unnamed: 0,User-ID,Age,City,Region/State,Country,Age_Missing,ISBN,Book-Rating,Rating_Categ,Book-Title,Book-Author,Year-Of-Publication,Publisher,Recommend,Not_Recommend
354,242,37.0,neuffen,badenwuerttemberg,germany,0,971880107,0,Not rated,Wild Animus,Rich Shapero,2004.0,Too Far,0,0
394,243,-1.0,arden hills,minnesota,usa,1,60915544,10,Excellent (8-10),The Bean Trees,Barbara Kingsolver,1989.0,Perennial,1,0
395,243,-1.0,arden hills,minnesota,usa,1,60977493,7,Good (6-7),The God of Small Things,Arundhati Roy,1998.0,Perennial,1,0
397,243,-1.0,arden hills,minnesota,usa,1,156006529,0,Not rated,Where or When : A Novel,Anita Shreve,1999.0,Harvest Books,0,0
400,243,-1.0,arden hills,minnesota,usa,1,316096199,0,Not rated,Lucky : A Memoir,Alice Sebold,2002.0,Back Bay Books,0,0


In [5]:
user_book_matrix = df_subset.pivot(index='User-ID', columns='ISBN', values='Recommend').fillna(0)

### Simple reccomnendation system

In [6]:
# Example: User selects ISBN "12345"
#selected_book = "059035342X" # Harry Potter and the Sorcerer's Stone

selected_book = '0385504209' # The Da Vinci Code

### Look up book name for selected book
selected_book_name = df[df['ISBN'] == selected_book]['Book-Title'].values[0]
print(f"Selected Book: {selected_book_name}")

recommendations = recommend_books(selected_book, df_subset)
not_reccomendations = not_recommended_books(selected_book, df_subset)

### Join the two dataframes by the ISBN
merge_reccomendations = recommendations.merge(not_reccomendations, how='left', left_on='Book-Title', right_on='Book-Title').fillna(0)

## Calculate reccomneded %%!
merge_reccomendations['Recommendation_Percentage'] =100* merge_reccomendations['Count_recommend'] / (merge_reccomendations['Count_recommend'] + merge_reccomendations['Count_not_recommend'])
merge_reccomendations.head(5)

Selected Book: The Da Vinci Code


Unnamed: 0,Book-Title,Count_recommend,Count_not_recommend,Recommendation_Percentage
0,Angels &amp; Demons,34,0.0,100.0
1,The Lovely Bones: A Novel,32,0.0,100.0
2,The Secret Life of Bees,25,1.0,96.153846
3,The Red Tent (Bestselling Backlist),19,1.0,95.0
4,Life of Pi,17,0.0,100.0


### Objective function - find reccomended books

In [7]:
rank_books(merge_reccomendations, count_col="Count_recommend", percent_col="Recommendation_Percentage", top_n=100, w1=0.8, w2=0.2)

Unnamed: 0,Book-Title,Score,Count_recommend,Recommendation_Percentage
0,Angels &amp; Demons,1.000000,34,100.000000
1,The Lovely Bones: A Novel,0.951515,32,100.000000
2,The Secret Life of Bees,0.766434,25,96.153846
3,The Red Tent (Bestselling Backlist),0.616364,19,95.000000
4,Life of Pi,0.587879,17,100.000000
...,...,...,...,...
86,Pay It Forward,0.345455,7,100.000000
72,The Queen of the Damned (Vampire Chronicles (P...,0.345455,7,100.000000
87,Confessions of a Shopaholic (Summer Display Op...,0.345455,7,100.000000
93,The Last Precinct,0.345455,7,100.000000
