In [None]:
# importation des modules
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings

from keras.layers import Input, Embedding, Flatten, Dot, Dense
from keras.models import Model

warnings.filterwarnings('ignore')

In [None]:
# Téléchargements des CSV
!wget https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv
!wget https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv
!wget https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/book_tags.csv
!wget https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/tags.csv

--2021-04-27 13:09:51--  https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 72126826 (69M) [text/plain]
Saving to: ‘ratings.csv’


2021-04-27 13:09:52 (94.5 MB/s) - ‘ratings.csv’ saved [72126826/72126826]

--2021-04-27 13:09:52--  https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3286659 (3.1M) [text/plain]
Saving to: ‘books.csv’


2021-04-27 13:09:53 (20.1 MB/s) - ‘books.csv’ saved [3286

In [None]:
ratings = pd.read_csv( 'ratings.csv' )
books = pd.read_csv( 'books.csv' )
tags = pd.read_csv( 'tags.csv' )
book_tags = pd.read_csv( 'book_tags.csv')

In [None]:
# Afficher les livres avec les meilleures notes
best_ratings_count = books[['goodreads_book_id','title','original_title','average_rating','ratings_count']]
best_ratings_count = best_ratings_count.sort_values(by=["average_rating"], ascending=False)

In [None]:
# Afficher les livres avec le plus de commentaires
best_text_reviews_count = books[['goodreads_book_id','title','original_title','average_rating','work_text_reviews_count']]
best_text_reviews_count = best_text_reviews_count.sort_values(by=["work_text_reviews_count"], ascending=False)

In [None]:
best_ratings_count.head(5)

Unnamed: 0,goodreads_book_id,title,original_title,average_rating,ratings_count
3627,24812,The Complete Calvin and Hobbes,The Complete Calvin and Hobbes,4.82,28900
3274,8,"Harry Potter Boxed Set, Books 1-5 (Harry Potte...",,4.77,33220
861,17332218,"Words of Radiance (The Stormlight Archive, #2)",Words of Radiance,4.77,73572
8853,95602,Mark of the Lion Trilogy,Mark of the Lion Trilogy,4.76,9081
7946,5031805,ESV Study Bible,,4.76,8953


In [None]:
best_text_reviews_count.head(5)

Unnamed: 0,goodreads_book_id,title,original_title,average_rating,work_text_reviews_count
0,2767052,"The Hunger Games (The Hunger Games, #1)",The Hunger Games,4.34,155254
5,11870085,The Fault in Our Stars,The Fault in Our Stars,4.26,140739
29,8442457,Gone Girl,Gone Girl,4.03,121614
11,13335037,"Divergent (Divergent, #1)",Divergent,4.24,101023
19,7260188,"Mockingjay (The Hunger Games, #3)",Mockingjay,4.03,96274


In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(ratings, test_size=0.2, random_state=42)

In [None]:
# Récupération du nombre d'utilisateurs et de livres
user_max = len(ratings.user_id.unique())
books_max = len(ratings.book_id.unique())

In [None]:
from keras.layers import Concatenate

# creating book embedding path
book_input = Input(shape=[1], name="Book-Input")
book_embedding = Embedding(books_max+1, 5, name="Book-Embedding")(book_input)
book_vec = Flatten(name="Flatten-Books")(book_embedding)

# creating user embedding path
user_input = Input(shape=[1], name="User-Input")
user_embedding = Embedding(user_max+1, 5, name="User-Embedding")(user_input)
user_vec = Flatten(name="Flatten-Users")(user_embedding)

# concatenate features
conc = Concatenate()([book_vec, user_vec])

# add fully-connected-layers
fc1 = Dense(128, activation='relu')(conc)
fc2 = Dense(32, activation='relu')(fc1)
out = Dense(1)(fc2)

# Create model and compile it
model = Model([user_input, book_input], out)
model.compile('adam', 'mean_squared_error')

In [None]:
from keras.models import load_model

if os.path.exists('regression_model2.h5'):
    model = load_model('regression_model2.h5')
else:
    history = model.fit([train.user_id, train.book_id], train.rating, epochs=5, verbose=1)
    model.save('regression_model2.h5')

In [None]:
model.evaluate([test.user_id, test.book_id], test.rating)



0.8166475892066956

In [None]:
predictions = model.predict([test.user_id.head(10), test.book_id.head(10)])

In [None]:
# Making recommendations for the first user
book_data = np.array(list(set(ratings.book_id)))
user = np.array([1 for i in range(len(book_data))])

In [None]:
predictions = model.predict([user, book_data])

predictions = np.array([a[0] for a in predictions])

recommended_book_ids = (-predictions).argsort()[:20]

In [None]:
recommended_book = books[books['book_id'].isin(recommended_book_ids)]

In [None]:
recommended_book = recommended_book.sort_values(by=["average_rating"], ascending=False)

In [None]:
recommended_book['original_publication_year'] = recommended_book['original_publication_year'].astype(int)

In [None]:
recommended_book.head(1)

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,title,language_code,average_rating,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
3626,3627,7592279,7592279,9985502,37,1741664489,9781742000000.0,John Flanagan,2011,,"The Emperor of Nihon-Ja (Ranger's Apprentice, ...",eng,4.42,29397,33357,1089,226,618,3569,9564,19380,https://images.gr-assets.com/books/1446448563m...,https://images.gr-assets.com/books/1446448563s...


In [None]:
recommended_book = recommended_book.loc[:,['title','authors','original_publication_year','average_rating','work_text_reviews_count']]

In [None]:
recommended_book

Unnamed: 0,title,authors,original_publication_year,average_rating,work_text_reviews_count
3626,"The Emperor of Nihon-Ja (Ranger's Apprentice, ...",John Flanagan,2011,4.42,1089
190,Watchmen,"Alan Moore, Dave Gibbons, John Higgins",1987,4.35,10851
860,"Walking Disaster (Beautiful, #2)",Jamie McGuire,2013,4.23,11348
8944,"The Next Accident (Quincy & Rainie, #3)",Lisa Gardner,2001,4.16,513
8852,"Sudden Prey (Lucas Davenport, #8)",John Sandford,1996,4.15,363
6918,Napalm & Silly Putty,George Carlin,2001,4.12,316
3751,"Poirot Investiga (Hércules Poirot, #3)",Agatha Christie,1924,4.07,779
9074,Princess of Glass (The Princesses of Westfalin...,Jessica Day George,2010,4.03,1669
3356,"The Collectors (Camel Club, #2)",David Baldacci,2006,4.01,1417
5578,"Vanish (Firelight, #2)",Sophie Jordan,2011,3.95,2028
