# Basic user 2 user 

In [1]:
import pandas as pd
import numpy as np

### Init and load the data

In [2]:
# Data paths
user_ratings_path = "../Datasets/Ratings.csv"
user_books_path = "../Datasets/Books.csv"
user_users_path = "../Datasets/Users.csv"

# Load data
user_ratings = pd.read_csv(user_ratings_path)
user_books = pd.read_csv(user_books_path)
user_users = pd.read_csv(user_users_path)


# Add ratings to the user_books
ratings_summary = user_ratings.groupby("ISBN")["Book-Rating"].agg(["count", "mean"])
ratings_summary = ratings_summary.rename(columns={"count": "Rating_Count", "mean": "Average_Rating"})
user_books = pd.merge(user_books, ratings_summary, on="ISBN", how="left")
user_books["Rating_Count"] = user_books["Rating_Count"].fillna(0) 
user_books["Average_Rating"] = user_books["Average_Rating"].fillna(user_books["Average_Rating"].median())

#Remove photos
user_books.drop(inplace=True, axis=1, columns=["Image-URL-S", "Image-URL-M", "Image-URL-L"])


  user_books = pd.read_csv(user_books_path)


In [3]:
model_path = "../models/books_embeddings_new_dataset.npy"

#Load the model
model = np.load(model_path, allow_pickle=True)

In [15]:
user_books.head(10)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Rating_Count,Average_Rating
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,1.0,0.0
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,14.0,4.928571
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,3.0,5.0
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,11.0,4.272727
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,1.0,0.0
5,0399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,33.0,4.212121
6,0425176428,What If?: The World's Foremost Military Histor...,Robert Cowley,2000,Berkley Publishing Group,5.0,1.6
7,0671870432,PLEADING GUILTY,Scott Turow,1993,Audioworks,3.0,2.666667
8,0679425608,Under the Black Flag: The Romance and the Real...,David Cordingly,1996,Random House,1.0,0.0
9,074322678X,Where You'll Find Me: And Other Stories,Ann Beattie,2002,Scribner,1.0,5.0


In [3]:
user_books.head(1)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...


In [7]:
model["book_embedding"] = model["book_embedding"].apply(
    lambda embedding_str: np.array([float(x) for x in embedding_str.split(',')]))

In [10]:
model.shape

(103063, 2)

In [8]:
model["Book-Title"] = model["name"]
model.drop(columns=["name"], inplace=True, axis=1)
model.head()

Unnamed: 0,book_embedding,Book-Title
0,"[0.012158381752669811, 0.12222578376531601, -0...",Goat Brothers
1,"[0.10339748859405518, 0.01937180384993553, -0....",The Missing Person
2,"[0.5171516537666321, 0.41499367356300354, -0.1...",Don't Eat Your Heart Out Cookbook
3,"[0.1548372060060501, -0.16966335475444794, -0....",When Your Corporate Umbrella Begins to Leak: A...
4,"[0.05578360706567764, -0.17162510752677917, -0...",Amy Spangler's Breastfeeding : A Parent's Guide


In [9]:
books = pd.merge(model, user_books, on="Book-Title", how="inner")
books.drop(axis=1, columns=["Image-URL-S", "Image-URL-M", "Image-URL-L"], inplace=True)

In [10]:
books.head()

Unnamed: 0,book_embedding,Book-Title,ISBN,Book-Author,Year-Of-Publication,Publisher,Rating_Count,Average_Rating
0,"[0.012158381752669811, 0.12222578376531601, -0...",Goat Brothers,038524407X,Larry Colton,1993,Doubleday Books,1.0,0.0
1,"[0.5171516537666321, 0.41499367356300354, -0.1...",Don't Eat Your Heart Out Cookbook,089480488X,Joseph C. Piscatella,1983,Workman Pub Co,1.0,0.0
2,"[0.1548372060060501, -0.16966335475444794, -0....",When Your Corporate Umbrella Begins to Leak: A...,091576590X,Paul D. Davis,1991,National Press Books,1.0,0.0
3,"[0.0917205661535263, 0.13396266102790833, -0.1...",The Foundation of Leadership: Enduring Princip...,096582070X,Strom Thurmond,1997,Excalibur Press,2.0,5.0
4,"[-0.15954221785068512, 0.1715346723794937, 0.0...",Journey Through Heartsongs,0786869429,Mattie J. T. Stepanek,2002,Hyperion Press,8.0,2.25


In [11]:
ratings_and_embeddings = pd.merge(model, user_books, on="Book-Title", how="inner")

In [17]:
ratings_and_embeddings.head()

Unnamed: 0,book_embedding,Book-Title,ISBN,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,Rating_Count,Average_Rating
0,"[0.012158381752669811, 0.12222578376531601, -0...",Goat Brothers,038524407X,Larry Colton,1993,Doubleday Books,http://images.amazon.com/images/P/038524407X.0...,http://images.amazon.com/images/P/038524407X.0...,http://images.amazon.com/images/P/038524407X.0...,1.0,0.0
1,"[0.5171516537666321, 0.41499367356300354, -0.1...",Don't Eat Your Heart Out Cookbook,089480488X,Joseph C. Piscatella,1983,Workman Pub Co,http://images.amazon.com/images/P/089480488X.0...,http://images.amazon.com/images/P/089480488X.0...,http://images.amazon.com/images/P/089480488X.0...,1.0,0.0
2,"[0.1548372060060501, -0.16966335475444794, -0....",When Your Corporate Umbrella Begins to Leak: A...,091576590X,Paul D. Davis,1991,National Press Books,http://images.amazon.com/images/P/091576590X.0...,http://images.amazon.com/images/P/091576590X.0...,http://images.amazon.com/images/P/091576590X.0...,1.0,0.0
3,"[0.0917205661535263, 0.13396266102790833, -0.1...",The Foundation of Leadership: Enduring Princip...,096582070X,Strom Thurmond,1997,Excalibur Press,http://images.amazon.com/images/P/096582070X.0...,http://images.amazon.com/images/P/096582070X.0...,http://images.amazon.com/images/P/096582070X.0...,2.0,5.0
4,"[-0.15954221785068512, 0.1715346723794937, 0.0...",Journey Through Heartsongs,0786869429,Mattie J. T. Stepanek,2002,Hyperion Press,http://images.amazon.com/images/P/0786869429.0...,http://images.amazon.com/images/P/0786869429.0...,http://images.amazon.com/images/P/0786869429.0...,8.0,2.25


In [16]:
user_ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [None]:
user_ratings = user_ratings.merge(ratings_and_embeddings, on='ISBN', how='inner')

def weighted_embedding(user_data):
    ratings = user_data['Book-Rating'].to_numpy()
    embeddings = np.vstack(user_data['book_embedding'])

    weighted_sum = np.sum(ratings[:, np.newaxis] * embeddings, axis=0)
    total_weight = np.sum(ratings)

    return weighted_sum / total_weight


weighted_embeddings = user_ratings.groupby('User-ID').apply(weighted_embedding).reset_index()


weighted_embeddings.columns = ['User-ID', 'weighted_embedding']


  return weighted_sum / total_weight
  weighted_embeddings = user_ratings.groupby('User-ID').apply(weighted_embedding).reset_index()


AttributeError: 'function' object has no attribute 'head'

In [20]:
user_ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,book_embedding_x,Book-Title_x,Book-Author_x,Year-Of-Publication_x,Publisher_x,Image-URL-S_x,Image-URL-M_x,...,book_embedding_y,Book-Title_y,Book-Author_y,Year-Of-Publication_y,Publisher_y,Image-URL-S_y,Image-URL-M_y,Image-URL-L_y,Rating_Count_y,Average_Rating_y
0,276725,034545104X,0,,,,,,,,...,,,,,,,,,,
1,276726,0155061224,5,,,,,,,,...,,,,,,,,,,
2,276727,0446520802,0,"[-0.11909034103155136, -0.08501839637756348, 0...",The Notebook,Nicholas Sparks,1996.0,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,...,"[-0.11909034103155136, -0.08501839637756348, 0...",The Notebook,Nicholas Sparks,1996.0,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,116.0,4.060345
3,276727,0446520802,0,"[-0.11909034103155136, -0.08501839637756348, 0...",The Notebook,Nicholas Sparks,1996.0,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,...,"[-0.11090793460607529, -0.1434868574142456, 0....",The Notebook,Nicholas Sparks,1996.0,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,116.0,4.060345
4,276727,0446520802,0,"[-0.11090793460607529, -0.1434868574142456, 0....",The Notebook,Nicholas Sparks,1996.0,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,...,"[-0.11909034103155136, -0.08501839637756348, 0...",The Notebook,Nicholas Sparks,1996.0,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,116.0,4.060345


In [14]:
merged = pd.merge(user_ratings, ratings_and_embeddings, on="Book-Title", how="inner")

KeyError: 'Book-Title'

In [None]:
user_ratings.groupby("User-ID")["Book-Rating"].agg(["count", "mean"])

Unnamed: 0_level_0,count,mean
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1
2,1,0.000000
7,1,0.000000
8,18,2.166667
9,3,2.000000
10,2,3.000000
...,...,...
278846,2,4.000000
278849,4,2.250000
278851,23,3.956522
278852,1,8.000000


In [13]:
users = pd.merge(user_ratings, books, on="ISBN", how="inner")
users.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,book_embedding,Book-Title,Book-Author,Year-Of-Publication,Publisher,Rating_Count,Average_Rating
0,276727,0446520802,0,"[-0.11909034103155136, -0.08501839637756348, 0...",The Notebook,Nicholas Sparks,1996,Warner Books,116.0,4.060345
1,276727,0446520802,0,"[-0.11090793460607529, -0.1434868574142456, 0....",The Notebook,Nicholas Sparks,1996,Warner Books,116.0,4.060345
2,276744,038550120X,7,"[0.08054692298173904, 0.2433057427406311, 0.24...",A Painted House,JOHN GRISHAM,2001,Doubleday,184.0,3.336957
3,276746,0425115801,0,"[-0.31400224566459656, 0.030658535659313202, 0...",Lightning,Dean R. Koontz,1996,Berkley Publishing Group,134.0,2.529851
4,276746,0425115801,0,"[-0.22315305471420288, -0.2346639186143875, 0....",Lightning,Dean R. Koontz,1996,Berkley Publishing Group,134.0,2.529851
