In [74]:
import pandas as pd
import numpy as np
import math
import operator

# Collaborative Filtering Example
Create a simple collaborative filtering recommender system. 
Below is the custom data for this system. 

All credits for the tutorial and data belong to: https://nbviewer.org/github/BadreeshShetty/Learnings-to-make-Recommedations/

In [2]:
reviews = {
    'Marlon Brando': {
    'The Godfather': 5.00, 
    'The Godfather Part II': 4.29,
    'Apocalypse Now': 5.00, 
    'Jaws': 1.
    },
    'Stephen King': {
    'The Shawshank Redemption': 4.89, 
    'The Shining': 4.93 , 
    'The Green Mile': 4.87,
    'The Godfather': 1.33,
    },
    'Steven Spielberg': {
    'Raiders of the Lost Ark': 5.0, 
    'Jaws': 4.89,
    'Saving Private Ryan': 4.78, 
    'Star Wars Episode IV - A New Hope': 4.33,
    'Close Encounters of the Third Kind': 4.77,
    'The Godfather':  1.25,
    'The Godfather Part II': 1.72
    },
    'George Lucas':{
    'Star Wars Episode IV - A New Hope': 5.00	
    },
    'Al Pacino': {
    'The Godfather': 4.02, 
    'The Godfather Part II': 5.00,
    },
    'Robert DeNiro': {
    'The Godfather': 3.07, 
    'The Godfather Part II': 4.29, 
    'Raging Bull': 5.00, 
    'Goodfellas':  4.89
    },
    'Robert Duvall': {
    'The Godfather': 3.80, 
    'The Godfather Part II': 3.61,
    'Apocalypse Now': 4.26 
    },
    'Jack Nicholson': {
    'The Shining': 5.0,
    'One Flew Over The Cuckoos Nest': 5.0,
    'The Godfather': 2.22,
    'The Godfather Part II': 3.34
    },
    'Morgan Freeman': {
    'The Shawshank Redemption': 4.98,
    'The Shining': 4.42,
    'Apocalypse Now': 1.63,
    'The Godfather': 1.12,
    'The Godfather Part II': 2.16
    },
    'Harrison Ford': {
    'Raiders of the Lost Ark': 5.0, 
    'Star Wars Episode IV - A New Hope': 4.84,
    },
    'Tom Hanks': {
    'Saving Private Ryan': 3.78, 
    'The Green Mile': 4.96,
    'The Godfather': 1.04,
    'The Godfather Part II': 1.03
    },
    'Francis Ford Coppola': {
    'The Godfather': 5.00, 
    'The Godfather Part II': 5.0, 
    'Jaws': 1.24,
    'One Flew Over The Cuckoos Nest': 2.02
    },
    'Martin Scorsese': {
    'Raging Bull': 5.0, 
    'Goodfellas': 4.87,
    'Close Encounters of the Third Kind': 1.14,
    'The Godfather': 4.00
    },
    'Diane Keaton': {
    'The Godfather': 2.98,
    'The Godfather Part II': 3.93,
    'Close Encounters of the Third Kind': 1.37
    },
    'Richard Dreyfuss': {
    'Jaws': 5.0, 
    'Close Encounters of the Third Kind': 5.0,
    'The Godfather': 1.07,
    'The Godfather Part II': 0.63
    },
    'Joe Pesci': {
    'Raging Bull': 4.89, 
    'Goodfellas': 5.0,
    'The Godfather': 4.87,
    'Star Wars Episode IV - A New Hope': 1.32
    }
}


## Utility Function

Utility function to get common movies between critics and their respective reviews

In [3]:
def get_common_movies(critic_a, critic_b, reviews):
    review_a = reviews[critic_a]
    review_b = reviews[critic_b]
    return list(set(review_a) & set(review_b))

In [4]:
get_common_movies('Marlon Brando','Robert DeNiro', reviews)

['The Godfather', 'The Godfather Part II']

In [5]:
get_common_movies('Steven Spielberg','Tom Hanks', reviews)

['Saving Private Ryan', 'The Godfather', 'The Godfather Part II']

In [6]:
def get_reviews(critic_a, critic_b, reviews):
    common_mov = get_common_movies(critic_a, critic_b, reviews)
    return [(reviews[critic_a][movie], reviews[critic_b][movie]) for movie in common_mov]

In [7]:
ex_reviews = get_reviews('Marlon Brando','Robert DeNiro', reviews)

In [8]:
print(ex_reviews)

[(5.0, 3.07), (4.29, 4.29)]


In [9]:
ex_reviews_2 = get_reviews('Steven Spielberg','Tom Hanks', reviews)

In [10]:
ex_reviews_2

[(4.78, 3.78), (1.25, 1.04), (1.72, 1.03)]

In [18]:
def euclidean_distance(points):
    squared_diffs = [(point[0] - point[1]) ** 2 for point in points]
    summed_squared_diffs = sum(squared_diffs)
    distance = math.sqrt(summed_squared_diffs)
    return distance

In [19]:
euclidean_distance(ex_reviews_2)

1.232963908636421

**Euclidian Distance Similairty:** Closer distance has higher score, equal distance is 1, and longer distance has less similarity.

In [23]:
def similarity(common_reviews):
    return 1/ (1 + euclidean_distance(common_reviews))

In [24]:
def get_critic_similarity(critic_a, critic_b, reviews):
    common_reviews = get_reviews(critic_a, critic_b, reviews)
    return similarity(common_reviews)

In [25]:
get_critic_similarity('Marlon Brando','Robert DeNiro', reviews)

0.341296928327645

In [26]:
get_critic_similarity('Steven Spielberg','Tom Hanks', reviews)

0.4478352722730117

In [27]:
get_critic_similarity('Martin Scorsese','Joe Pesci', reviews)

0.5300793497254199

## Recommendations

In [89]:
def identify_critics_similarity(critic, no_suggestions, reviews):
    similarity_scores = [(get_critic_similarity(critic, other, reviews), other) for other in reviews if other != critic]
    similarity_scores.sort()
    similarity_scores.reverse()
    similarity_scores = similarity_scores[0:no_suggestions] 
    return similarity_scores


def recommendations(similarity_scores, critic, reviews):
    recommend = {}

    for similarity, other_critic in similarity_scores:
        review = reviews[other_critic]
        for movie in review:
            if movie not in reviews[critic]:
                weight = similarity * review[movie]
                if movie in recommend:
                    sim, weights = recommend[movie]
                    recommend[movie] = (sim + similarity, weights + [weight])  # Weights is a list of weights
                else:
                    recommend[movie] = (similarity, [weight])
    return recommend


def sort_recommendations(recommend):
    norm_recommendations = {}
    for rec_mov in recommend.keys():
        similarity, mov_weights = recommend[rec_mov]
        norm_recommendations[rec_mov] = sum(mov_weights) / similarity
    return norm_recommendations

def recommend_movies(critic, no_suggestions, reviews):
    similarity_scores = identify_critics_similarity(critic, no_suggestions, reviews)
    recommend = recommendations(similarity_scores, critic, reviews)
    norm_recommend = sort_recommendations(recommend)

    sorted_rec = sorted(norm_recommend.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_rec
    

In [90]:
recommend_movies('Marlon Brando',4, reviews)

[('Goodfellas', 5.000000000000001),
 ('Raiders of the Lost Ark', 5.0),
 ('Raging Bull', 4.89),
 ('Star Wars Episode IV - A New Hope', 3.8157055214723923),
 ('One Flew Over The Cuckoos Nest', 2.02)]

In [91]:
recommend_movies('Robert DeNiro',4, reviews)

[('Raiders of the Lost Ark', 5.0),
 ('Star Wars Episode IV - A New Hope', 4.92),
 ('Close Encounters of the Third Kind', 1.2744773851327365)]

In [92]:
recommend_movies('Steven Spielberg',4, reviews)

[('The Shawshank Redemption', 4.928285762244913),
 ('The Green Mile', 4.87),
 ('The Shining', 4.71304734727882),
 ('Apocalypse Now', 1.63)]

In [93]:
recommend_movies('Tom Hanks',3, reviews)

[('Raiders of the Lost Ark', 5.0),
 ('The Shining', 4.93),
 ('Star Wars Episode IV - A New Hope', 4.92),
 ('The Shawshank Redemption', 4.89)]