In [1]:
import numpy as np

np.set_printoptions(formatter={'float': '{: 0.2f}'.format})

# users are vertical, movies are horizontal
# 1 means user likes the movie and 0 means user doesn't know about the movie
matrix = np.array([
    [1, 1, 0, 0, 0],
    [1, 1, 1, 0, 0],
    [0, 0, 1, 1, 1],
    [1, 1, 1, 0, 1],
    [1, 1, 1, 0, 0],
    [1, 1, 0, 0, 0]
])

numOfUsers, numOfMovies = matrix.shape

print("matrix %d users x %d movies" % (numOfUsers, numOfMovies))
print(matrix)

matrix 6 users x 5 movies
[[1 1 0 0 0]
 [1 1 1 0 0]
 [0 0 1 1 1]
 [1 1 1 0 1]
 [1 1 1 0 0]
 [1 1 0 0 0]]


In [2]:
from sklearn.metrics.pairwise import pairwise_distances

# the more distance is, the less similar things are

userDistances = pairwise_distances(matrix, metric="cosine")
print("\nuser distances")
print(userDistances)
def printUserDistance(userDistances, userA, userB):
    print("distance between users %d and %d is %0.2f" % (userA, userB, userDistances[userA, userB]))
printUserDistance(userDistances, 0, 0)
printUserDistance(userDistances, 0, 1)
printUserDistance(userDistances, 0, 2)

movieDistances = pairwise_distances(matrix.T, metric="cosine")
print("\nmovie distances")
print(movieDistances)
def printMovieDistance(movieDistances, movieA, movieB):
    print("distance between movies %d and %d is %0.2f" % (movieA, movieB, movieDistances[movieA, movieB]))
printMovieDistance(movieDistances, 0, 0)
printMovieDistance(movieDistances, 0, 1)
printMovieDistance(movieDistances, 0, 2)
printMovieDistance(movieDistances, 0, 3)
printMovieDistance(movieDistances, 0, 4)


user distances
[[ 0.00  0.18  1.00  0.29  0.18  0.00]
 [ 0.18  0.00  0.67  0.13  0.00  0.18]
 [ 1.00  0.67  0.00  0.42  0.67  1.00]
 [ 0.29  0.13  0.42  0.00  0.13  0.29]
 [ 0.18  0.00  0.67  0.13  0.00  0.18]
 [ 0.00  0.18  1.00  0.29  0.18  0.00]]
distance between users 0 and 0 is 0.00
distance between users 0 and 1 is 0.18
distance between users 0 and 2 is 1.00

movie distances
[[ 0.00  0.00  0.33  1.00  0.68]
 [ 0.00  0.00  0.33  1.00  0.68]
 [ 0.33  0.33  0.00  0.50  0.29]
 [ 1.00  1.00  0.50  0.00  0.29]
 [ 0.68  0.68  0.29  0.29  0.00]]
distance between movies 0 and 0 is 0.00
distance between movies 0 and 1 is 0.00
distance between movies 0 and 2 is 0.33
distance between movies 0 and 3 is 1.00
distance between movies 0 and 4 is 0.68


In [9]:
from collections import namedtuple

def findSimilarUsers(targetUserId):
    distances = userDistances[targetUserId]
    UserAndDistance = namedtuple("UserAndDistance", ["userId", "distance"])
    usersWithDistances = [UserAndDistance(userId, distance) for userId, distance in enumerate(distances)]
    usersWithDistances = list(filter(lambda u: u.userId != targetUserId, usersWithDistances))
    usersWithDistances = sorted(usersWithDistances, key=lambda u: u.distance)
    return usersWithDistances

def findMoviesLikedByUser(targetUserId):
    movies = matrix[targetUserId]
    LikedMovie = namedtuple("LikedMovie", ["movieId", "isLiked"])
    likedMovies = [LikedMovie(movieId, isLiked) for movieId, isLiked in enumerate(movies)]
    likedMovies = list(filter(lambda m: m.isLiked, likedMovies))
    return likedMovies
    
def recommendMoviesToUser(targetUserId):
    moviesLikedByTargetUser = findMoviesLikedByUser(targetUserId)
    idsOfMoviesLikedByTargetUser = list(map(lambda m: m.movieId, moviesLikedByTargetUser))
    similarUsers = findSimilarUsers(targetUserId)
    weightenedRecommendations = {}
    for similarUser in similarUsers:
        if similarUser.distance > 0.999:
            continue
            
        moviesLikedBySimilarUser = findMoviesLikedByUser(similarUser.userId)
        for movieLikedBySimilarUser in moviesLikedBySimilarUser:
            likedMovieId = movieLikedBySimilarUser.movieId
            weight = 1 - similarUser.distance
            if not likedMovieId in weightenedRecommendations:
                weightenedRecommendations[likedMovieId] = 0
            weightenedRecommendations[likedMovieId] += weight
    
    for movieLikedByTargetUser in moviesLikedByTargetUser:
        if movieLikedByTargetUser.movieId in weightenedRecommendations:
            weightenedRecommendations.pop(movieLikedByTargetUser.movieId)
    
    return weightenedRecommendations
    
print("recommendations")    
print("user 0", recommendMoviesToUser(0))
print("user 1", recommendMoviesToUser(1))
print("user 2", recommendMoviesToUser(2))

recommendations
user 0 {2: 2.3400999430419995, 4: 0.70710678118654746}
user 1 {4: 1.1993587371177723, 3: 0.33333333333333348}
user 2 {0: 1.2440169358562927, 1: 1.2440169358562927}
