## Importing Libraries and Loading Data 

In [1]:
import math
import operator

# Building Custom Data for Movie Rating
# Here critics/users are 'Marlon Brando', 'Stephen King', 'Steven Spielberg' and so on...
# Under each critic/user we have some list of movies that they have rated and the corresponding ratings as key value pairs

review = {
'Marlon Brando': {
'The Godfather': 5.00, 
'The Godfather Part II': 4.29,
'Apocalypse Now': 5.00, 
'Jaws': 1.
},
'Stephen King': {
'The Shawshank Redemption': 4.89, 
'The Shining': 4.93 , 
'The Green Mile': 4.87,
'The Godfather': 1.33,
},
'Steven Spielberg': {
'Raiders of the Lost Ark': 5.0, 
'Jaws': 4.89,
'Saving Private Ryan': 4.78, 
'Star Wars Episode IV - A New Hope': 4.33,
'Close Encounters of the Third Kind': 4.77,
'The Godfather':  1.25,
'The Godfather Part II': 1.72
},
'George Lucas':{
'Star Wars Episode IV - A New Hope': 5.00	
},
'Al Pacino': {
'The Godfather': 4.02, 
'The Godfather Part II': 5.00,
},
'Robert DeNiro': {
'The Godfather': 3.07, 
'The Godfather Part II': 4.29, 
'Raging Bull': 5.00, 
'Goodfellas':  4.89
},
'Robert Duvall': {
'The Godfather': 3.80, 
'The Godfather Part II': 3.61,
'Apocalypse Now': 4.26 
},
'Jack Nicholson': {
'The Shining': 5.0,
'One Flew Over The Cuckoos Nest': 5.0,
'The Godfather': 2.22,
'The Godfather Part II': 3.34
},
'Morgan Freeman': {
'The Shawshank Redemption': 4.98,
'The Shining': 4.42,
'Apocalypse Now': 1.63,
'The Godfather': 1.12,
'The Godfather Part II': 2.16
},
'Harrison Ford': {
'Raiders of the Lost Ark': 5.0, 
'Star Wars Episode IV - A New Hope': 4.84,
},
'Tom Hanks': {
'Saving Private Ryan': 3.78, 
'The Green Mile': 4.96,
'The Godfather': 1.04,
'The Godfather Part II': 1.03
},
'Francis Ford Coppola': {
'The Godfather': 5.00, 
'The Godfather Part II': 5.0, 
'Jaws': 1.24,
'One Flew Over The Cuckoos Nest': 2.02
},
'Martin Scorsese': {
'Raging Bull': 5.0, 
'Goodfellas': 4.87,
'Close Encounters of the Third Kind': 1.14,
'The Godfather': 4.00
},
'Diane Keaton': {
'The Godfather': 2.98,
'The Godfather Part II': 3.93,
'Close Encounters of the Third Kind': 1.37
},
'Richard Dreyfuss': {
'Jaws': 5.0, 
'Close Encounters of the Third Kind': 5.0,
'The Godfather': 1.07,
'The Godfather Part II': 0.63
},
'Joe Pesci': {
'Raging Bull': 4.89, 
'Goodfellas': 5.0,
'The Godfather': 4.87,
'Star Wars Episode IV - A New Hope': 1.32
}
}

In [5]:
# getting common listed movies between users

common_movies = list()
criticA, criticB = 'Marlon Brando', 'Robert DeNiro'

for movie in review[criticA]:
    if movie in review[criticB]:
        common_movies.append(movie)

common_movies

['The Godfather', 'The Godfather Part II']

In [6]:
# Function to get common listed movies b/w Users

def get_common_movies(criticA,criticB):
    return [movie for movie in review[criticA] if movie in review[criticB]]

In [3]:
get_common_movies('Marlon Brando','Robert DeNiro')

['The Godfather', 'The Godfather Part II']

In [4]:
get_common_movies('Steven Spielberg','Tom Hanks')

['Saving Private Ryan', 'The Godfather', 'The Godfather Part II']

In [5]:
get_common_movies('Martin Scorsese','Joe Pesci')

['Raging Bull', 'Goodfellas', 'The Godfather']

In [9]:
# Getting the movie ratings corresponding to a user

common_movies_review = list()
criticA, criticB = 'Marlon Brando', 'Robert DeNiro'

# first getting the list of common movies
common_movies = get_common_movies(criticA,criticB)

# extracting the ratings of the movies by selecting each critic
# packing reviews of two users about a single movie in a single tuple
for movie in common_movies:
    common_movies_review.append((review[criticA][movie], review[criticB][movie]))

# here m1=>(u1, u2), m2=>(u1, u2), m3=>(u1, u2) and so on...
# So, more movies means more dimensions of two vectors u1 and u2 
# [(5.0, 3.07), (4.29, 4.29)] => [movie1=>(u1, u2), movie=>(u1, u2)]
common_movies_review

[(5.0, 3.07), (4.29, 4.29)]

In [10]:
# Function to get reviews from the common movies
# We will use `get_common_movies` func to get the movies first and then we will extract the corresponding ratings 

# we have implemented the above function in 2 lines
def get_reviews(criticA,criticB):
    common_movies = get_common_movies(criticA,criticB)
    return [(review[criticA][movie], review[criticB][movie]) for movie in common_movies]

In [7]:
get_reviews('Marlon Brando','Robert DeNiro')

[(5.0, 3.07), (4.29, 4.29)]

In [8]:
get_reviews('Steven Spielberg','Tom Hanks')

[(4.78, 3.78), (1.25, 1.04), (1.72, 1.03)]

In [9]:
get_reviews('Martin Scorsese','Joe Pesci')

[(5.0, 4.89), (4.87, 5.0), (4.0, 4.87)]

## Euclidean Distance Formula for Calculating similarity
$d(x,y)=\sqrt{(x2-x1)^2 + (y2-y1)^2 + (z2-z1)^2 + ...}$

In [None]:
# 'Marlon Brando': {
# 'The Godfather': 5.00, 
# 'The Godfather Part II': 4.29,
# 'Apocalypse Now': 5.00, 
# 'Jaws': 1.
# }

# 'Robert DeNiro': {
# 'The Godfather': 3.07, 
# 'The Godfather Part II': 4.29, 
# 'Raging Bull': 5.00, 
# 'Goodfellas':  4.89
# }

#           m1(x)       m2(y)
# u1(V1)    5(x1)       4.29(y1)
# u2(V2)    3.07(x2)    4.29(y2)

# We are trying to find out similarity between users
# So, here users are vectors and movies are dimensions
# u1 => x1 y1 z1 ... or m1 m2 m3 are dimensions of u1
# u2 => x2 y2 z2 ... or m1 m2 m3 are dimensions of u2

In [19]:
# we will calculate similarity based on Euclidean Distance between users

squared_diffs = list()
points = common_movies_review
print(points) # [(5.0, 3.07), (4.29, 4.29)] => [(x1, x2), (y1, y2)] 

# first we will calculate the squared difference
for point in points:
    squared_diffs.append((point[0] - point[1]) ** 2)
print(squared_diffs)

# then we will sum them up
summed_squared_diffs = sum(squared_diffs)
print(summed_squared_diffs)

# then we will calculate square root
distance = math.sqrt(summed_squared_diffs)
print(distance)

[(5.0, 3.07), (4.29, 4.29)]
[3.7249000000000008, 0.0]
3.7249000000000008
1.9300000000000002


In [20]:
# Function to get Euclidean Distance b/w 2 points or users

def euclidean_distance(points):
    squared_diffs = [(point[0] - point[1]) ** 2 for point in points]
    summed_squared_diffs = sum(squared_diffs)
    distance = math.sqrt(summed_squared_diffs)
    return distance

In [21]:
# Function to calculate similarity
# more similar means less the distance and vice versa
# so we will invert the distance to get the similarity
# Added 1 in the denominator because, highly similar users can make the distance zero in the denominator and it will give Error

def similarity(reviews):
    return 1/ (1 + euclidean_distance(reviews))

In [22]:
# Function to get similarity b/w 2 users
# we will use the above developed functions to calculate similarity b/w 2 users
# Process: (u1, u2) => get movies => get review => get distance => get similarity  

def get_critic_similarity(criticA, criticB):
    reviews = get_reviews(criticA,criticB)
    return similarity(reviews)

In [23]:
get_critic_similarity('Marlon Brando','Robert DeNiro')

0.341296928327645

In [24]:
get_critic_similarity('Steven Spielberg','Tom Hanks')

0.4478352722730117

In [25]:
get_critic_similarity('Martin Scorsese','Joe Pesci')

0.5300793497254199

In [26]:
# lets design the recommendation system based on their reviews

critic = 'Marlon Brando'
similarity_scores = list()

for other in review:
    if other != critic:
        # (user-user similarity score and the similar user name) packing in a tuple
        similarity_scores.append((get_critic_similarity(critic, other), other))

similarity_scores

[(0.21413276231263384, 'Stephen King'),
 (0.143199423486626, 'Steven Spielberg'),
 (1.0, 'George Lucas'),
 (0.4524548503084212, 'Al Pacino'),
 (0.341296928327645, 'Robert DeNiro'),
 (0.3898259201723122, 'Robert Duvall'),
 (0.2539463683316, 'Jack Nicholson'),
 (0.15236677465222687, 'Morgan Freeman'),
 (1.0, 'Harrison Ford'),
 (0.16315211297736404, 'Tom Hanks'),
 (0.57160283618054, 'Francis Ford Coppola'),
 (0.5, 'Martin Scorsese'),
 (0.3276724152237754, 'Diane Keaton'),
 (0.1299324814096532, 'Richard Dreyfuss'),
 (0.8849557522123894, 'Joe Pesci')]

In [28]:
# extracting top similarities

num_suggestions = 5

# sorting the similarity values in descending order
similarity_scores.sort(reverse=True)

# extracting top 5 similar users
similarity_scores = similarity_scores[:num_suggestions]

similarity_scores

[(1.0, 'Harrison Ford'),
 (1.0, 'George Lucas'),
 (0.8849557522123894, 'Joe Pesci'),
 (0.57160283618054, 'Francis Ford Coppola'),
 (0.5, 'Martin Scorsese')]

In [None]:
# 'Marlon Brando': {
# 'The Godfather': 5.00, 
# 'The Godfather Part II': 4.29,
# 'Apocalypse Now': 5.00, 
# 'Jaws': 1.
# }

# 'Harrison Ford': {
# 'Raiders of the Lost Ark': 5.0, 
# 'Star Wars Episode IV - A New Hope': 4.84,
# }

# 'George Lucas':{
# 'Star Wars Episode IV - A New Hope': 5.00	
# }

In [45]:
# based on user-user similary we will recommend movies

# Dictionary to store recommendations
recommendations = {}

# Dictionary to store recommendations
for similarity, other in similarity_scores:

    # capturing the movies and corresponding ratings of similar(other) users
    reviewed = review[other]
    print("1)",other, reviewed)

    # Storing the review
    # looping through the set of movies and its rating given by a similar(other) user of his/her reviewed movies
    for movie in reviewed:
        
        print("2)",movie)

        # we are proceeding only if the movie is not rated by our user already
        # If our user (not the other user) has already rated the movie then there is no point in recommending that 
        if movie not in review[critic]:
            
            # we are setting a weight for the movie we are planning to recommend
            # (similary score between user-other) X (rating given by the other user to that specific movie)
            weight = similarity * reviewed[movie]
            print("3)",similarity,reviewed[movie])
            
            # Weighing similarity with review
            # we are checking whether the movie is already present in the recommendations dictionary
            # Note: if the movie is selected for the first time then it won't be there
            # So, we move to the else part of the loop and create the movie as key and (similarity and corresponding weights as a list) as values
            if movie in recommendations:
                sim, weights = recommendations[movie]
                # we are adding the movie name as key and its value will be based on calculation
                # total similarity = sim + similarity => (u-u1)similarity + (u-u2)similarity and so on...
                # appending the weights in the list => weights + [weight] => weights.extend([weight])
                recommendations[movie] = (sim + similarity, weights + [weight])
                print("4)",recommendations,"\n\n")
            else:
                recommendations[movie] = (similarity, [weight])
                print("5)",recommendations,"\n")

recommendations
# output:
# {'Raiders of the Lost Ark': (1.0, [5.0]),
#  'Star Wars Episode IV - A New Hope': (2.8849557522123894,
#   [4.84, 5.0, 1.168141592920354]),
#  'Raging Bull': (1.3849557522123894, [4.327433628318584, 2.5]),
#  'Goodfellas': (1.3849557522123894, [4.424778761061948, 2.435]),
#  'One Flew Over The Cuckoos Nest': (0.57160283618054, [1.1546377290846908]),
#  'Close Encounters of the Third Kind': (0.5, [0.57])}

1) Harrison Ford {'Raiders of the Lost Ark': 5.0, 'Star Wars Episode IV - A New Hope': 4.84}
2) Raiders of the Lost Ark
3) 1.0 5.0
5) {'Raiders of the Lost Ark': (1.0, [5.0])} 

2) Star Wars Episode IV - A New Hope
3) 1.0 4.84
5) {'Raiders of the Lost Ark': (1.0, [5.0]), 'Star Wars Episode IV - A New Hope': (1.0, [4.84])} 

1) George Lucas {'Star Wars Episode IV - A New Hope': 5.0}
2) Star Wars Episode IV - A New Hope
3) 1.0 5.0
4) {'Raiders of the Lost Ark': (1.0, [5.0]), 'Star Wars Episode IV - A New Hope': (2.0, [4.84, 5.0])} 


1) Joe Pesci {'Raging Bull': 4.89, 'Goodfellas': 5.0, 'The Godfather': 4.87, 'Star Wars Episode IV - A New Hope': 1.32}
2) Raging Bull
3) 0.8849557522123894 4.89
5) {'Raiders of the Lost Ark': (1.0, [5.0]), 'Star Wars Episode IV - A New Hope': (2.0, [4.84, 5.0]), 'Raging Bull': (0.8849557522123894, [4.327433628318584])} 

2) Goodfellas
3) 0.8849557522123894 5.0
5) {'Raiders of the Lost Ark': (1.0, [5.0]), 'Star Wars Episode IV - A New Hope': (2.0, [4.84, 5.0

{'Raiders of the Lost Ark': (1.0, [5.0]),
 'Star Wars Episode IV - A New Hope': (2.8849557522123894,
  [4.84, 5.0, 1.168141592920354]),
 'Raging Bull': (1.3849557522123894, [4.327433628318584, 2.5]),
 'Goodfellas': (1.3849557522123894, [4.424778761061948, 2.435]),
 'One Flew Over The Cuckoos Nest': (0.57160283618054, [1.1546377290846908]),
 'Close Encounters of the Third Kind': (0.5, [0.57])}

In [46]:
# looping through the recommended movies
# we will perform some calculation based on (our_user-other_user) total similarity and total rating
# (sum of (ratings)) / (sum of (similarities) => we have already done before) 

for recommendation in recommendations:
    # capturing sum of similarities and list of ratings for that specific movie
    similarity, movie = recommendations[recommendation]
    # basically, Normalizing weights with similarity
    recommendations[recommendation] = sum(movie) / similarity

recommendations

{'Raiders of the Lost Ark': 5.0,
 'Star Wars Episode IV - A New Hope': 3.8157055214723923,
 'Raging Bull': 4.9297124600638975,
 'Goodfellas': 4.953067092651758,
 'One Flew Over The Cuckoos Nest': 2.02,
 'Close Encounters of the Third Kind': 1.14}

In [47]:
# sorting recommendations

sorted_recommendations = sorted(recommendations.items(), key=operator.itemgetter(1), reverse=True)

sorted_recommendations

[('Raiders of the Lost Ark', 5.0),
 ('Goodfellas', 4.953067092651758),
 ('Raging Bull', 4.9297124600638975),
 ('Star Wars Episode IV - A New Hope', 3.8157055214723923),
 ('One Flew Over The Cuckoos Nest', 2.02),
 ('Close Encounters of the Third Kind', 1.14)]

In [16]:
# Function to give recommendation to users based on their reviews.

def recommend_movies(critic, num_suggestions):
    similarity_scores = [(get_critic_similarity(critic, other), other) for other in review if other != critic]
    # Get similarity Scores for all the critics
    similarity_scores.sort(reverse=True)
    similarity_scores = similarity_scores[0:num_suggestions]

    # Dictionary to store recommendations
    recommendations = {}
    
    # Dictionary to store recommendations
    for similarity, other in similarity_scores:
        reviewed = review[other]
        # Storing the review
        for movie in reviewed:
            if movie not in review[critic]:
                weight = similarity * reviewed[movie]
                # Weighing similarity with review
                if movie in recommendations:
                    sim, weights = recommendations[movie]
                    recommendations[movie] = (sim + similarity, weights + [weight])
                    # Similarity of movie along with weight
                else:
                    recommendations[movie] = (similarity, [weight])
                    

    for recommendation in recommendations:
        similarity, movie = recommendations[recommendation]
        recommendations[recommendation] = sum(movie) / similarity
        # Normalizing weights with similarity

    sorted_recommendations = sorted(recommendations.items(), key=operator.itemgetter(1), reverse=True)
    #Sorting recommendations with weight
    return sorted_recommendations

In [17]:
recommend_movies('Marlon Brando',4)

[('Goodfellas', 5.000000000000001),
 ('Raiders of the Lost Ark', 5.0),
 ('Raging Bull', 4.89),
 ('Star Wars Episode IV - A New Hope', 3.8157055214723923),
 ('One Flew Over The Cuckoos Nest', 2.02)]

In [18]:
recommend_movies('Robert DeNiro',4)

[('Raiders of the Lost Ark', 5.0),
 ('Star Wars Episode IV - A New Hope', 4.92),
 ('Close Encounters of the Third Kind', 1.2744773851327365)]

In [19]:
recommend_movies('Steven Spielberg',4)

[('The Shawshank Redemption', 4.928285762244913),
 ('The Green Mile', 4.87),
 ('The Shining', 4.71304734727882),
 ('Apocalypse Now', 1.63)]

In [20]:
recommend_movies('Tom Hanks',4)

[('Raiders of the Lost Ark', 5.0),
 ('Jaws', 5.0),
 ('Close Encounters of the Third Kind', 5.0),
 ('The Shining', 4.93),
 ('Star Wars Episode IV - A New Hope', 4.92),
 ('The Shawshank Redemption', 4.89)]

In [21]:
recommend_movies('Martin Scorsese',4)

[('Raiders of the Lost Ark', 5.0),
 ('Star Wars Episode IV - A New Hope', 4.92),
 ('The Godfather Part II', 4.3613513513513515),
 ('Apocalypse Now', 4.26)]

In [22]:
recommend_movies('Joe Pesci',4)

[('Apocalypse Now', 5.000000000000001),
 ('The Godfather Part II', 4.7280538302277435),
 ('One Flew Over The Cuckoos Nest', 2.02),
 ('Close Encounters of the Third Kind', 1.14),
 ('Jaws', 1.12)]