In [2]:
# reset directory
%reset -f

# load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity


In [65]:
# read in data
books = pd.read_csv(r"c:\Users\e1002902\Downloads\archive\Books_rating.csv")

In [66]:
books.head(2)

Unnamed: 0,Id,Title,Price,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text
0,1882931173,Its Only Art If Its Well Hung!,,AVCGYZL8FQQTD,"Jim of Oz ""jim-of-oz""",7/7,4.0,940636800,Nice collection of Julie Strain images,This is only for Julie Strain fans. It's a col...
1,826414346,Dr. Seuss: American Icon,,A30TK6U7DNS82R,Kevin Killian,10/10,5.0,1095724800,Really Enjoyed It,I don't care much for Dr. Seuss but after read...


# Cleaning Data

1. Remove Duplicate reviews

2. Remove books with less than 10 reviews

3. Remove users with less than 10 reviews

In [67]:
books = books[['User_id', 'Title', 'review/score', 'review/text']]
books 

Unnamed: 0,User_id,Title,review/score,review/text
0,AVCGYZL8FQQTD,Its Only Art If Its Well Hung!,4.0,This is only for Julie Strain fans. It's a col...
1,A30TK6U7DNS82R,Dr. Seuss: American Icon,5.0,I don't care much for Dr. Seuss but after read...
2,A3UH4UZ4RSVO82,Dr. Seuss: American Icon,5.0,"If people become the books they read and if ""t..."
3,A2MVUWT453QH61,Dr. Seuss: American Icon,4.0,"Theodore Seuss Geisel (1904-1991), aka &quot;D..."
4,A22X4XUPKF66MR,Dr. Seuss: American Icon,4.0,Philip Nel - Dr. Seuss: American IconThis is b...
...,...,...,...,...
2999995,,The Idea of History,4.0,"This is an extremely difficult book to digest,..."
2999996,A1SMUB9ASL5L9Y,The Idea of History,4.0,This is pretty interesting. Collingwood seems ...
2999997,A2AQMEKZKK5EE4,The Idea of History,4.0,"This is a good book but very esoteric. ""What i..."
2999998,A18SQGYBKS852K,The Idea of History,5.0,"My daughter, a freshman at Indiana University,..."


In [68]:
# count number of duplicates
print("Number duplicate reviews", books.duplicated().sum())

# see duplicates
print("Duplicates")
display(books[books.duplicated(keep=False)].head(10))

# remove duplicates
books.drop_duplicates(inplace=True)

Number duplicate reviews 380279
Duplicates


Unnamed: 0,User_id,Title,review/score,review/text
253,,King James: Believe the Hype---The LeBron Jame...,4.0,King James by Ryan Jones is a biography of Leb...
256,,King James: Believe the Hype---The LeBron Jame...,4.0,King James by Ryan Jones is a biography of Leb...
389,A3FJAY5LKN0DOM,The book of the lover and the beloved;,5.0,A friend gave me this book a couple of years a...
390,A2SVFGRQB3DHZ,The book of the lover and the beloved;,4.0,If sometimes the meaning is inaccessible the e...
422,,Night World: Daughters Of Darkness,5.0,This book was outstanding! I couldn't put it d...
423,,Night World: Daughters Of Darkness,5.0,This book was outstanding! I couldn't put it d...
428,,Night World: Daughters Of Darkness,5.0,"I was sceptical about this book at first, I ha..."
429,,Night World: Daughters Of Darkness,5.0,"I was sceptical about this book at first, I ha..."
529,,America at 1750: A Social Portrait,5.0,The brilliant historian Richard Hofstadter was...
530,A3DKP67DK28RUB,America at 1750: A Social Portrait,5.0,This is a magnificent work of historical imagi...


In [None]:
# drop rows with missing values in review/text, review/score, Title and User_id
books.dropna(subset=['review/text', 'review/score', 'Title', 'User_id'], inplace=True)

In [71]:
# check data 
print("Shape of data", books.shape)
print("Number of unique users", books.User_id.nunique())
print("Number of unique books", books.Title.nunique())

Shape of data (2132528, 4)
Number of unique users 1008961
Number of unique books 206711


In [77]:
# get count of reviews per book
book_review_count = books.groupby('Title')['review/score'].count().reset_index().rename(columns={'review/score':'review_count'}).sort_values('review_count', ascending=False)

# How many books with less than 10 reviews?
print("How many books with less than 10 reviews:", book_review_count[book_review_count.review_count < 30].shape)

# remove books with less than 10 reviews
books = books[books.Title.isin(book_review_count[book_review_count.review_count > 30].Title)]

# get updated info on data
print("Shape of data", books.shape)
print("Number of unique users", books.User_id.nunique())
print("Number of unique books", books.Title.nunique())

How many books with less than 10 reviews: (13428, 2)
Shape of data (129592, 4)
Number of unique users 4539
Number of unique books 1727


In [78]:
# get count of reviews per user
user_review_count = books.groupby('User_id')['review/score'].count().reset_index().rename(columns={'review/score':'review_count'}).sort_values('review_count', ascending=False)

# How many users with less than 20 reviews?
print("Number users with less than 20 reviews", user_review_count[user_review_count.review_count > 20].shape)

# remove users with less than 20 reviews
books = books[books.User_id.isin(user_review_count[user_review_count.review_count > 20].User_id)]

# get updated info on data
print("Shape of data", books.shape)
print("Number of unique users", books.User_id.nunique())
print("Number of unique books", books.Title.nunique())
display(books.head(3))


Number users with less than 20 reviews (2249, 2)
Shape of data (101149, 4)
Number of unique users 2249
Number of unique books 1727


Unnamed: 0,User_id,Title,review/score,review/text
1222,AF3X7J0XC391L,Economics in one lesson,5.0,"This is, in my opinion, the most important int..."
1249,A321W4SSC0F6AP,Economics in one lesson,5.0,"You know how most economics books are really, ..."
1260,A3JPFWKS83R49V,Economics in one lesson,4.0,Author Henry Hazlitt is definitely a member of...


## Validate Cleaning

Check to see if user review count is above 20

In [79]:
# get count of reviews per user
print("Count of Reviews per user:") 
display(books.groupby('User_id')['review/score'].count().reset_index().rename(columns={'review/score':'review_count'}).sort_values('review_count', ascending=True).head(5))

# get count of reviews per book
print("Count of Reviews per book:")
display(books.groupby('Title')['review/score'].count().reset_index().rename(columns={'review/score':'review_count'}).sort_values('review_count', ascending=True).head(5))

Count of Reviews per user:


Unnamed: 0,User_id,review_count
1529,A3N0E03AQD128O,21
166,A1AXG78TNTPDA6,21
1927,AGZJEUQXB2150,21
1515,A3MCQSIBV7QW8Q,21
1171,A30RI6N2MGFMFK,21


Count of Reviews per book:


Unnamed: 0,Title,review_count
1214,"The Duke and I (Bridgerton Series, Book 1)",6
1688,"Whitney, My Love",7
254,Carolina Moon,12
203,Black Rose,12
204,Black Rose (In the Garden Series),12


In [80]:
# final check on data
print("Shape of data", books.shape)
print("Number of unique users", books.User_id.nunique())
print("Number of unique books", books.Title.nunique())


Shape of data (101149, 4)
Number of unique users 2249
Number of unique books 1727


In [81]:
# check duplicates
print("Number of duplicates:", books.duplicated().sum())

# see duplicates
print("Duplicates")
display(books[books.duplicated(keep=False)].head(10))

Number of duplicates: 0
Duplicates


Unnamed: 0,User_id,Title,review/score,review/text


# Item-User Matrix

In [87]:
# create pivot table
books_pivot = books.pivot_table(index='User_id', columns='Title', values='review/score').fillna(0)
books_pivot.head(3)

Title,"""A"" IS FOR ALIBI","1,000 Places to See Before You Die: A Traveler's Life List",1491: New Revelations of the Americas Before Columbus,1632 (The Assiti Shards),1984,1st to Die: A Novel,"20, 000 Leagues Under the Sea",2001: A Space Odyssey,4 Blondes,48 Laws of Power,...,Year of Wonders (Turtleback School & Library Binding Edition),Zen And The Art of Motorcycle Maintenance,Zen and the Art of Motorcycle Maintenance,Zen and the Art of Motorcycle Maintenance : An Inquiry Into Values,Zen and the Art of Motorcycle Maintenance : An Inquiry into Values,Zorro - A Novel,everything on this page is for Treasure Island,prince caspian: the return to narnia,the Picture of Dorian Gray,the illustrated man
User_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A106016KSI0YQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A10872FHIJAKKD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A10A1S5NAQBT21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0


# Item Based Collaborative Filtering

1. Have User Item matrix
2. Calculate similarity (cosine similarity)
3. Calculate weighted average of ratings
4. Fill in missing values with predicted ratings

In [88]:
# SIMILARITY MATRIX ########################################## 

# Calculate item-item similarity matrix
item_similarity_matrix = cosine_similarity(books_pivot.T)  # Transpose the matrix to have items as rows


# PREDICTED RATINGS ########################################## 

# create function to predict ratings
def predict_ratings(user_ratings, item_similarity_matrix):
    
    # Initialize an array to store predicted ratings
    predicted_ratings = np.zeros(user_ratings.shape)

    # Iterate through each user
    for user_id in range(user_ratings.shape[0]):
        
        # Find unrated books (where user_ratings == 0)
        unrated_books = np.where(user_ratings[user_id, :] == 0)[0]

        # Iterate through unrated books
        for book_id in unrated_books:
            # Calculate the predicted rating for the unrated book
            rated_books = np.where(user_ratings[user_id, :] > 0)[0]
            predicted_rating = np.sum(
                user_ratings[user_id, rated_books] * item_similarity_matrix[book_id, rated_books]
            ) / np.sum(np.abs(item_similarity_matrix[book_id, rated_books]))

            # Assign the predicted rating to the user_ratings matrix
            predicted_ratings[user_id, book_id] = predicted_rating

    return predicted_ratings

# apply function to get predicted ratings
predicted_ratings = predict_ratings(books_pivot.values, item_similarity_matrix)

# FILL IN MATRIX ###############################################

# Fill in the matrix with predicted ratings
filled_matrix = books_pivot.copy()
filled_matrix = filled_matrix.to_numpy()
filled_matrix[filled_matrix == 0] = predicted_ratings[filled_matrix == 0]
filled_matrix = pd.DataFrame(filled_matrix, columns=books_pivot.columns, index=books_pivot.index)

In [99]:
filled_matrix

Title,"""A"" IS FOR ALIBI","1,000 Places to See Before You Die: A Traveler's Life List",1491: New Revelations of the Americas Before Columbus,1632 (The Assiti Shards),1984,1st to Die: A Novel,"20, 000 Leagues Under the Sea",2001: A Space Odyssey,4 Blondes,48 Laws of Power,...,Year of Wonders (Turtleback School & Library Binding Edition),Zen And The Art of Motorcycle Maintenance,Zen and the Art of Motorcycle Maintenance,Zen and the Art of Motorcycle Maintenance : An Inquiry Into Values,Zen and the Art of Motorcycle Maintenance : An Inquiry into Values,Zorro - A Novel,everything on this page is for Treasure Island,prince caspian: the return to narnia,the Picture of Dorian Gray,the illustrated man
User_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A106016KSI0YQ,3.698780,3.807621,3.575590,3.946012,3.974340,3.883530,4.217895,3.956143,3.379133,3.647744,...,3.712372,3.935393,3.935393,3.935393,3.935393,3.726164,4.132121,4.236961,3.973735,3.991655
A10872FHIJAKKD,5.000000,4.785000,4.847160,4.939276,4.889311,4.748068,4.936126,4.930877,5.000000,5.000000,...,4.421572,4.935530,4.935530,4.935530,4.935530,5.000000,4.839874,4.870104,4.852613,4.879402
A10A1S5NAQBT21,4.672861,5.000000,5.000000,5.000000,4.532852,4.303542,4.334120,4.662730,5.000000,4.784756,...,4.698969,4.622180,4.622180,4.622180,4.622180,5.000000,4.302350,4.426234,5.000000,4.416384
A10BZSGALQPS0V,3.667069,4.292849,4.054266,4.167092,4.057489,4.163900,3.960534,4.100171,4.345257,4.116741,...,4.158555,4.079098,4.079098,4.079098,4.079098,4.179283,4.132759,4.390096,3.985937,4.313282
A10ETB3064YV0F,4.288135,3.847626,3.955799,4.515917,4.316896,4.131319,4.140898,4.261409,3.934975,3.595715,...,4.318098,3.752009,3.752009,3.752009,3.752009,4.180216,3.957032,4.332043,4.094625,3.929897
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AZJ1N5LS6Q0FD,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,...,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000
AZM09BO1FD418,3.473763,3.041298,3.190770,4.061257,4.121433,3.747529,4.361101,4.201540,2.000000,3.000000,...,3.994209,3.868362,3.868362,3.868362,3.868362,3.214507,4.155484,4.288809,4.075726,4.126608
AZSNQ9FYC5SQO,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,...,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000
AZSV99SDJC242,4.176972,4.458849,4.279156,4.277241,4.416627,4.360988,4.280244,4.390886,3.748792,4.179858,...,4.386042,4.326155,4.326155,4.326155,4.326155,4.586209,4.249465,4.069510,4.309007,4.535623


# Evaluation of Model

1. Split data into train and test
2. Apply steps 1-4 above: Rebuild the filled in matrix
3. Calculate MAE, RMSE, MSE

In [103]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Step 1: Create a copy of the original matrix to store hidden ratings
training_matrix = books_pivot.copy()

# Iterate through each user
for user_id in range(training_matrix.shape[0]):
    # Find the indices of rated books for this user
    rated_books = np.where(training_matrix.iloc[user_id, :] > 0)[0]
    
    # Randomly select 5 ratings to hide (if available)
    num_to_hide = min(5, len(rated_books))
    hidden_indices = np.random.choice(rated_books, num_to_hide, replace=False)
    
    # Set the selected ratings to 0 in the hidden ratings matrix
    training_matrix.iloc[user_id, hidden_indices] = 0

# Step 2: Rebuild the Filled Matrix (use your existing predict_ratings function)
item_similarity_matrix = cosine_similarity(training_matrix.T)
predicted_ratings = predict_ratings(training_matrix.values, item_similarity_matrix)

  predicted_rating = np.sum(


In [105]:
# Step 3: Calculate Evaluation Metrics
# Extract hidden ratings and corresponding predicted ratings
hidden_ratings = training_matrix.values[training_matrix.values != 0]
predicted_hidden_ratings = predicted_ratings[training_matrix.values != 0]

# Calculate MAE, MSE, and RMSE
mae = mean_absolute_error(hidden_ratings, predicted_hidden_ratings)
mse = mean_squared_error(hidden_ratings, predicted_hidden_ratings)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")


Mean Absolute Error (MAE): 4.303012298252018
Mean Squared Error (MSE): 19.43548823977144
Root Mean Squared Error (RMSE): 4.408569863319786


(86497,)

# Testing Area (Ignore)


In [28]:
x = pd.read_csv(r"C:\Users\e1002902\Documents\GitHub Repository\Masters-Dissertation\Code\temp_data.csv", index_col=0)
x

Unnamed: 0,book1,book2,book3,book4,book5,book6,book7,book8,book9,book10
user1,0,0,2,5,4,3,4,4,4,4
user2,4,0,3,5,0,0,0,0,0,4
user3,0,3,4,4,0,2,0,0,0,0
user4,0,0,3,5,4,0,0,0,0,0
user5,3,4,0,4,4,0,5,5,5,5
user6,4,5,0,0,0,0,4,2,2,0
user7,2,2,0,0,0,0,5,3,3,3
user8,0,5,4,0,4,3,0,0,0,0
user9,0,5,4,0,5,2,0,2,2,0
user10,0,0,0,0,5,0,4,4,4,4


In [29]:

np.random.seed(10)  # You can use any integer value as the seed
for user_id in range(x.shape[0]):
    rated_books = np.where(x.iloc[user_id, :] > 0)[0]
    print(user_id)
    print(rated_books)
    hidden_indices = np.random.choice(rated_books, min(2, len(rated_books)), replace=False)
    print(hidden_indices)
    x.iloc[user_id, hidden_indices] = 0


0
[2 3 4 5 6 7 8 9]
[4 5]
1
[0 2 3 9]
[3 9]
2
[1 2 3 5]
[5 1]
3
[2 3 4]
[4 2]
4
[0 1 3 4 6 7 8 9]
[9 1]
5
[0 1 6 7 8]
[8 1]
6
[0 1 6 7 8 9]
[8 9]
7
[1 2 4 5]
[1 5]
8
[1 2 4 5 7 8]
[1 7]
9
[4 6 7 8 9]
[6 4]
10
[0 1 2 4 6 7 8]
[2 0]
11
[0 1 2 4 5 6 7 8]
[1 7]


In [23]:
x

Unnamed: 0,book1,book2,book3,book4,book5,book6,book7,book8,book9,book10
user1,0,0,2,5,4,3,4,4,4,4
user2,0,0,3,0,0,0,0,0,0,4
user3,0,3,4,4,0,2,0,0,0,0
user4,0,0,0,5,0,0,0,0,0,0
user5,3,4,0,4,4,0,5,5,5,5
user6,4,5,0,0,0,0,4,2,2,0
user7,2,2,0,0,0,0,5,3,3,3
user8,0,5,4,0,4,3,0,0,0,0
user9,0,5,4,0,5,2,0,2,2,0
user10,0,0,0,0,5,0,4,4,4,4


In [24]:
# where 'HIDDEN' make 0
testing = testing.replace('HIDDEN', 0)
item_similarity_matrix = cosine_similarity(testing.T)
predicted_ratings = predict_ratings(testing.values, item_similarity_matrix)

NameError: name 'testing' is not defined

In [197]:
testing

Title,"1,000 Places to See Before You Die: A Traveler's Life List",1491: New Revelations of the Americas Before Columbus,1632 (The Assiti Shards),1984,1st to Die: A Novel,"20, 000 Leagues Under the Sea",2001: A Space Odyssey,4 Blondes,48 Laws of Power
User_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A10872FHIJAKKD,0.0,2.0,2.0,0.0,3.0,0.0,0.0,4.0,0.0
A10A1S5NAQBT21,2.0,3.0,0.0,5.0,0.0,4.0,0.0,0.0,0.0
A10BZSGALQPS0V,0.0,0.0,0.0,0.0,5.0,0.0,1.0,1.0,3.0
A10ETB3064YV0F,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,1.0
A10KKJMBY5L7TK,0.0,2.0,1.0,1.0,0.0,0.0,4.0,0.0,1.0
A10LWBOIZCF2QT,0.0,0.0,3.0,0.0,0.0,0.0,5.0,0.0,0.0
A10T0OW97SFBB,0.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0
A10X0JN8KTK89H,4.0,0.0,0.0,5.0,0.0,2.0,0.0,0.0,1.0
A10Y3OZWENAQ6W,0.0,0.0,0.0,4.0,0.0,0.0,3.0,0.0,0.0
A1129LM24YWSZV,0.0,0.0,4.0,4.0,0.0,5.0,4.0,0.0,0.0


In [198]:
# Find unrated books (where user_ratings == 0)
unrated_books = np.where(testing.values[1, :] == 0)[0]
unrated_books

array([2, 4, 6, 7, 8], dtype=int64)

In [199]:
rated_books = np.where(testing.values[1, :] > 0)[0]
rated_books

array([0, 1, 3, 5], dtype=int64)

In [200]:
item_similarity_matrix[2, rated_books]

array([0.        , 0.20851441, 0.27512557, 0.43958698])

In [None]:
# Calculate the predicted rating for the unrated book

predicted_rating = np.sum(
    testing[1, rated_books] * item_similarity_matrix[2, rated_books]
) / np.sum(np.abs(item_similarity_matrix[book_id, rated_books]))

predicted_rating

In [195]:
pd.DataFrame(predicted_ratings, columns=testing.columns, index=testing.index).round(2)

Title,"1,000 Places to See Before You Die: A Traveler's Life List",1491: New Revelations of the Americas Before Columbus,1632 (The Assiti Shards),1984,1st to Die: A Novel,"20, 000 Leagues Under the Sea",2001: A Space Odyssey,4 Blondes,48 Laws of Power
User_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A10872FHIJAKKD,2.0,0.0,0.0,2.0,0.0,2.0,2.25,0.0,2.9
A10A1S5NAQBT21,0.0,0.0,4.07,0.0,3.0,0.0,4.13,3.0,3.28
A10BZSGALQPS0V,3.0,2.12,1.81,1.61,0.0,1.38,0.0,0.0,0.0
A10ETB3064YV0F,1.0,2.55,0.0,2.8,1.53,3.53,3.02,2.55,0.0
A10KKJMBY5L7TK,1.24,0.0,0.0,0.0,1.45,1.7,0.0,1.67,0.0
A10LWBOIZCF2QT,,4.15,0.0,4.2,3.8,3.89,0.0,3.46,4.18
A10T0OW97SFBB,1.0,0.0,1.47,1.4,1.06,1.41,0.0,0.0,0.0
A10X0JN8KTK89H,0.0,3.28,2.68,0.0,1.0,0.0,2.89,1.0,0.0
A10Y3OZWENAQ6W,4.0,3.61,3.31,0.0,3.0,3.7,0.0,3.0,3.38
A1129LM24YWSZV,4.42,4.31,0.0,0.0,4.0,0.0,0.0,4.0,4.11


In [None]:

predicted_hidden_ratings = predicted_ratings[training_matrix.values != 0]
