In [1]:
import pandas as pd
import numpy as np

In [15]:
ratings = pd.read_csv("ratings.csv")
ratings

Unnamed: 0,user_id,book_id,rating
0,1,258,5
1,2,4081,4
2,2,260,5
3,2,9296,5
4,2,2318,3
...,...,...,...
5976474,49925,510,5
5976475,49925,528,4
5976476,49925,722,4
5976477,49925,949,5


In [16]:
from scipy.sparse import csr_matrix #Pour représenter les matrices sparses
from scipy.linalg import sqrtm

R_df = ratings.pivot(index='user_id', columns='book_id', values='rating').fillna(0)
R_df.head()

book_id,1,2,3,4,5,6,7,8,9,10,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,5.0,0.0,0.0,5.0,0.0,0.0,4.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,5.0,0.0,4.0,4.0,0.0,4.0,4.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
R = R_df.values #On donne à l'algorithme la matrice, pas le dataframe (np array)

# On normalise les lignes par utilisateur pour se débarasser de leur biais
# On retire leur moyenne de toute leur note, on les rajoutera plus tard aux predictions
user_ratings_mean = np.mean(R, axis=1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

In [18]:
R_demeaned

array([[-0.042 , -0.042 , -0.042 , ..., -0.042 , -0.042 , -0.042 ],
       [-0.0287,  4.9713, -0.0287, ..., -0.0287, -0.0287, -0.0287],
       [-0.0158, -0.0158, -0.0158, ..., -0.0158, -0.0158, -0.0158],
       ...,
       [ 3.9452,  4.9452, -0.0548, ..., -0.0548, -0.0548, -0.0548],
       [ 3.9657,  4.9657, -0.0343, ..., -0.0343, -0.0343, -0.0343],
       [ 3.9414,  4.9414,  3.9414, ..., -0.0586, -0.0586, -0.0586]])

In [19]:
from scipy.sparse.linalg import svds

latent_dimension = 30 #10~50 sont des valeurs communes
U, sigma, Vt = svds(R_demeaned, k=latent_dimension)

print(f"Dimensions de U : {U.shape}")
print(f"Dimensions de sigma : {sigma.shape}")
print(f"Dimensions de Vt : {Vt.shape}")

Dimensions de U : (53424, 30)
Dimensions de sigma : (30,)
Dimensions de Vt : (30, 10000)


In [20]:
print(type(U))
print(type(sigma))
print(type(Vt))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [20]:
sigma = np.diag(sigma) #Transforme en matrice (50, 50)
s_root = sqrtm(sigma)

#Représentations latentes des utilisateurs et des films en moindre dimension
Usk = np.dot(U, s_root)
skV = np.dot(s_root, Vt)

#Le produit matriciel de ces matrices (Qui incluent toutes le sdeux sigma ici) permet de prédire les notes
predicted_rating = np.dot(Usk, skV)

#Auxquelles on rajoute les moyennes soustraites tout à l'heure
predicted_rating = predicted_rating + user_ratings_mean.reshape(-1, 1)

In [25]:
print(type(sigma))

<class 'numpy.ndarray'>


In [23]:
print(type(predicted_rating))
print(type(R_df))

<class 'numpy.ndarray'>
<class 'pandas.core.frame.DataFrame'>


In [27]:
np.savez_compressed("predicted_rating.npz", predicted_rating)

In [26]:
R_df.columns

Int64Index([    1,     2,     3,     4,     5,     6,     7,     8,     9,
               10,
            ...
             9991,  9992,  9993,  9994,  9995,  9996,  9997,  9998,  9999,
            10000],
           dtype='int64', name='book_id', length=10000)

In [28]:
#On met tout ça dans un dataframe
preds_df = pd.DataFrame(predicted_rating, columns=R_df.columns)
preds_df.head()

book_id,1,2,3,4,5,6,7,8,9,10,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
0,0.666901,0.200106,0.814242,2.955004,2.049275,0.532281,0.680036,1.866034,2.107746,3.001596,...,0.04998,0.004429,-0.000205,-0.011982,0.005277,0.003615,0.005006,-0.003034,-0.000109,-0.009757
1,-0.018953,4.478084,0.716063,2.089408,1.784051,0.296253,0.795436,1.517782,1.746117,2.402022,...,0.003091,0.004375,0.012191,-0.008064,0.011408,0.005303,0.036265,0.011772,0.033925,0.038475
2,-0.141233,-0.148606,-0.155345,1.412267,1.047502,-0.087945,0.110216,0.982685,-0.109602,0.581602,...,0.006425,0.005515,0.007869,0.008709,0.016832,0.005822,0.011319,0.008376,0.011497,0.012169
3,-0.08895,4.441176,0.809893,5.656183,4.02903,0.094222,2.558393,4.216998,1.911742,2.5756,...,-0.001589,-0.031924,-0.027654,-0.012523,0.007041,-0.017774,0.001282,-0.004798,-0.004863,0.001096
4,-0.035806,0.152518,-0.192749,0.380092,0.295091,1.227638,-0.090605,0.131068,-0.133621,0.319065,...,0.02782,0.026052,0.026277,0.023461,0.018406,0.029623,0.023337,0.02162,0.032523,0.019107


In [None]:
def recommend_booksSVD(user_id, num_recommendations=100):
    #Ordonner les prédictions pour l'utilisateur donné
    sorted_user_predictions = preds_df.iloc[user_id].sort_values(ascending=False)
    user_data = ratings[ratings["user_id"] == user_id]
    non_read_books = books[~books['book_id'].isin(user_data['book_id'])]
    non_read_books = non_read_books.merge(sorted_user_predictions, how="left", on="book_id")
    non_read_books.rename(columns={user_id : "Predictions"}, inplace=True)
    non_read_books = non_read_books[["book_id", "goodreads_book_id", "original_title", "Predictions"]]
    non_read_books = non_read_books.sort_values("Predictions", ascending=False)
    non_read_books = non_read_books[:num_recommendations]
    
    return non_read_books

test = recommend_booksSVD(1)
test

In [52]:
user_data = ratings[ratings["user_id"] == 1]
user_data


Unnamed: 0,user_id,book_id,rating
0,1,258,5
75,1,268,3
76,1,5556,3
77,1,3638,3
78,1,1796,5
...,...,...,...
5704475,1,142,4
5704476,1,642,4
5704477,1,901,4
5704479,1,212,3


In [63]:
non_read_books = books[~books['book_id'].isin(user_full['book_id'])]

In [64]:
non_read_books.merge(test, how="left", on="book_id")

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,1
0,1,2767052,2767052,2792775,272,439023483,9.780439e+12,Suzanne Collins,2008.0,The Hunger Games,...,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...,-0.218795
1,2,3,3,4640799,491,439554934,9.780440e+12,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...,4.815195
2,3,41865,41865,3212258,226,316015849,9.780316e+12,Stephenie Meyer,2005.0,Twilight,...,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...,0.171729
3,5,4671,4671,245494,1356,743273567,9.780743e+12,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...,5.230449
4,6,11870085,11870085,16827462,226,525478817,9.780525e+12,John Green,2012.0,The Fault in Our Stars,...,2478609,140739,47994,92723,327550,698471,1311871,https://images.gr-assets.com/books/1360206420m...,https://images.gr-assets.com/books/1360206420s...,0.363471
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9878,9996,7130616,7130616,7392860,19,441019455,9.780441e+12,Ilona Andrews,2010.0,Bayou Moon,...,18856,1180,105,575,3538,7860,6778,https://images.gr-assets.com/books/1307445460m...,https://images.gr-assets.com/books/1307445460s...,0.023715
9879,9997,208324,208324,1084709,19,067973371X,9.780680e+12,Robert A. Caro,1990.0,Means of Ascent,...,12952,395,303,551,1737,3389,6972,https://s.gr-assets.com/assets/nophoto/book/11...,https://s.gr-assets.com/assets/nophoto/book/50...,0.052463
9880,9998,77431,77431,2393986,60,039330762X,9.780393e+12,Patrick O'Brian,1977.0,The Mauritius Command,...,10733,374,11,111,1191,4240,5180,https://images.gr-assets.com/books/1455373531m...,https://images.gr-assets.com/books/1455373531s...,0.010218
9881,9999,8565083,8565083,13433613,7,61711527,9.780062e+12,Peggy Orenstein,2011.0,Cinderella Ate My Daughter: Dispatches from th...,...,11994,1988,275,1002,3765,4577,2375,https://images.gr-assets.com/books/1279214118m...,https://images.gr-assets.com/books/1279214118s...,0.035599


In [53]:
books = pd.read_csv("books.csv")

In [54]:
user_full = (user_data.merge(books, how="left", on="book_id").sort_values(["rating"], ascending = False))[["book_id","original_title"]]

In [55]:
user_full

Unnamed: 0,book_id,original_title
0,258,La sombra del viento
14,11,The Kite Runner
30,70,Ender's Game
29,1521,Ἀντιγόνη
26,4,To Kill a Mockingbird
...,...,...
34,138,The Scarlet Letter
27,492,Speaker for the Dead
12,4614,The Emperor's Children
25,94,Cien años de soledad


In [61]:
recommendations = (books[~books['book_id'].isin(user_full['book_id'])].
      merge(pd.DataFrame(test).reset_index(), how='left',
              on="book_id").
      rename(columns={user_id: 'Predictions'}).
      sort_values('Predictions', ascending=False).
              iloc[:10, :-1]
      )

In [57]:
recommendations

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
12,18,5,5,2402163,376,043965548X,9780440000000.0,"J.K. Rowling, Mary GrandPré, Rufus Beck",1999.0,Harry Potter and the Prisoner of Azkaban,...,1832823,1969375,36099,6716,20413,166129,509447,1266670,https://images.gr-assets.com/books/1499277281m...,https://images.gr-assets.com/books/1499277281s...
3,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...
17,24,6,6,3046572,332,439139600,9780439000000.0,"J.K. Rowling, Mary GrandPré",2000.0,Harry Potter and the Goblet of Fire,...,1753043,1868642,31084,6676,20210,151785,494926,1195045,https://images.gr-assets.com/books/1361482611m...,https://images.gr-assets.com/books/1361482611s...
20,27,1,1,41335427,275,439785960,9780440000000.0,"J.K. Rowling, Mary GrandPré",2005.0,Harry Potter and the Half-Blood Prince,...,1678823,1785676,27520,7308,21516,136333,459028,1161491,https://images.gr-assets.com/books/1361039191m...,https://images.gr-assets.com/books/1361039191s...
18,25,136251,136251,2963218,263,545010225,9780545000000.0,"J.K. Rowling, Mary GrandPré",2007.0,Harry Potter and the Deathly Hallows,...,1746574,1847395,51942,9363,22245,113646,383914,1318227,https://images.gr-assets.com/books/1474171184m...,https://images.gr-assets.com/books/1474171184s...
16,23,15881,15881,6231171,398,439064864,9780439000000.0,"J.K. Rowling, Mary GrandPré",1998.0,Harry Potter and the Chamber of Secrets,...,1779331,1906199,34172,8253,42251,242345,548266,1065084,https://images.gr-assets.com/books/1474169725m...,https://images.gr-assets.com/books/1474169725s...
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
15,21,2,2,2809203,307,439358078,9780439000000.0,"J.K. Rowling, Mary GrandPré",2003.0,Harry Potter and the Order of the Phoenix,...,1735368,1840548,28685,9528,31577,180210,494427,1124806,https://images.gr-assets.com/books/1387141547m...,https://images.gr-assets.com/books/1387141547s...
10,15,48855,48855,3532896,710,553296981,9780553000000.0,"Anne Frank, Eleanor Roosevelt, B.M. Mooyaart-D...",1947.0,Het Achterhuis: Dagboekbrieven 14 juni 1942 - ...,...,1972666,2024493,20825,45225,91270,355756,656870,875372,https://images.gr-assets.com/books/1358276407m...,https://images.gr-assets.com/books/1358276407s...
88,127,2612,2612,2124255,100,316346624,9780316000000.0,Malcolm Gladwell,2000.0,The Tipping Point: How Little Things Can Make ...,...,490504,499066,10895,15111,25871,106453,189120,162511,https://images.gr-assets.com/books/1473396980m...,https://images.gr-assets.com/books/1473396980s...


In [58]:
recommendations.columns

Index(['book_id', 'goodreads_book_id', 'best_book_id', 'work_id',
       'books_count', 'isbn', 'isbn13', 'authors', 'original_publication_year',
       'original_title', 'title', 'language_code', 'average_rating',
       'ratings_count', 'work_ratings_count', 'work_text_reviews_count',
       'ratings_1', 'ratings_2', 'ratings_3', 'ratings_4', 'ratings_5',
       'image_url', 'small_image_url'],
      dtype='object')