# Tableau_Movie_Recommendation_Functions

## Project Description:

The following code seeks to adjust preciously made recommendation functions to aid in the creation of the Tableau recommendation dashboard.

In [25]:
# Establish connection to localhost 9004
from tabpy.tabpy_tools.client import Client
connection = Client('http://localhost:9004/')

In [26]:
# Create recommendation function
def recommendation_name (mov1, rating1, mov2, rating2, mov3, rating3):
    
    # Import packages
    import pandas as pd
    import numpy as np
    import surprise
    from scipy.spatial.distance import cosine
    from pickle import dump
    from pickle import load
    
    # Load algo as pickle file
    algo = load(open('SVD_Model.pkl', 'rb'))
    
    # Cosine distance between vectors calculation
    def cosine_distance(vector_a = np.array, vector_b = np.array):
        return cosine(vector_a, vector_b)
    
    # Retrieve vectors by movie name
    def get_vector_by_movie_name(movie_name, trained_model):
        movie_row_idx = trained_model.trainset._raw2inner_id_items[movie_name]
        return trained_model.qi[movie_row_idx]
    
    # Get vectors by movie name for three movies
    vector1 = get_vector_by_movie_name(mov1, algo)
    score1 = rating1
    vector2 = get_vector_by_movie_name(mov2, algo)
    score2 = rating2
    vector3 = get_vector_by_movie_name(mov3, algo)
    score3 = rating3
    
    ##############################################################################################################
    
    # Calculate cosine similarity for all three chosen movies' vectors against all other movie vectors
    similarity_table1 = []
    for movie_name in algo.trainset._raw2inner_id_items.keys():
        movie_vector = get_vector_by_movie_name(movie_name, algo)
        similarity_score = cosine_distance(vector1, movie_vector)
        similarity_table1.append((1-similarity_score, movie_name))
        
    # Convert similarity table into a data frame
    mov_rec1 = pd.DataFrame(similarity_table1, columns = ['similarity', 'movie name'])
    # Scale cosine score by rating
    mov_rec1['similarity'] = mov_rec1['similarity'] * score1
    # Sort data set to descending
    mov_rec1 = mov_rec1.sort_values('similarity', ascending = False)
    
    ##############################################################################################################
    
    # Calculate cosine similarity for all three chosen movies' vectors against all other movie vectors
    similarity_table2 = []
    for movie_name in algo.trainset._raw2inner_id_items.keys():
        movie_vector = get_vector_by_movie_name(movie_name, algo)
        similarity_score = cosine_distance(vector2, movie_vector)
        similarity_table2.append((1-similarity_score, movie_name))
        
    # Convert similarity table into a data frame
    mov_rec2 = pd.DataFrame(similarity_table2, columns = ['similarity', 'movie name'])
    # Scale cosine score by rating
    mov_rec2['similarity'] = mov_rec2['similarity'] * score2
    # Sort data set to descending
    mov_rec2 = mov_rec2.sort_values('similarity', ascending = False)
    
    ##############################################################################################################
    
    # Calculate cosine similarity for all three chosen movies' vectors against all other movie vectors
    similarity_table3 = []
    for movie_name in algo.trainset._raw2inner_id_items.keys():
        movie_vector = get_vector_by_movie_name(movie_name, algo)
        similarity_score = cosine_distance(vector3, movie_vector)
        similarity_table3.append((1-similarity_score, movie_name))
    
    # Convert similarity table into a data frame
    mov_rec3 = pd.DataFrame(similarity_table3, columns = ['similarity', 'movie name'])
    # Scale cosine score by rating
    mov_rec3['similarity'] = mov_rec3['similarity'] * score3
    # Sort data set to descending
    mov_rec3 = mov_rec3.sort_values('similarity', ascending = False)
    
    ##############################################################################################################
    
    # Create a list of all data frames
    df_list = [mov_rec1, mov_rec2, mov_rec3]
    # Concatenate all data frames by axis 0
    mov_rec4 = pd.concat(df_list, axis = 0)
    # Remove all three chosen movies 
    mov_rec4 = mov_rec4.loc[(mov_rec4['movie name'] != mov1) & (mov_rec4['movie name'] != mov2) &
                            (mov_rec4['movie name'] != mov3)].reset_index(drop = True)
    # Scale cosine score by duplicates
    mov_rec4 = mov_rec4.groupby(by = "movie name").sum().reset_index()
    # Sort values by cosine values in descending order
    mov_rec4 = mov_rec4.sort_values('similarity', ascending = False).reset_index(drop = True)
    # Get movie names as a list
    mov_name = mov_rec4['movie name'].to_list()[:10]
    # Print recommendations
    return mov_name

The above function returns a list of top 10 movie recommendations by movie name.

In [27]:
# Create recommendation function
def recommendation_score (mov1, rating1, mov2, rating2, mov3, rating3):
    
    # Import packages
    import pandas as pd
    import numpy as np
    import surprise
    from scipy.spatial.distance import cosine
    from pickle import dump
    from pickle import load
    
    # Load algo as pickle file
    algo = load(open('SVD_Model.pkl', 'rb'))
    
    # Cosine distance between vectors calculation
    def cosine_distance(vector_a = np.array, vector_b = np.array):
        return cosine(vector_a, vector_b)
    
    # Retrieve vectors by movie name
    def get_vector_by_movie_name(movie_name, trained_model):
        movie_row_idx = trained_model.trainset._raw2inner_id_items[movie_name]
        return trained_model.qi[movie_row_idx]
    
    # Get vectors by restaurant name for three movies
    vector1 = get_vector_by_movie_name(mov1, algo)
    score1 = rating1
    vector2 = get_vector_by_movie_name(mov2, algo)
    score2 = rating2
    vector3 = get_vector_by_movie_name(mov3, algo)
    score3 = rating3
    
    ##############################################################################################################
    
    # Calculate cosine similarity for all three chosen movies' vectors against all other movie vectors
    similarity_table1 = []
    for movie_name in algo.trainset._raw2inner_id_items.keys():
        movie_vector = get_vector_by_movie_name(movie_name, algo)
        similarity_score = cosine_distance(vector1, movie_vector)
        similarity_table1.append((1-similarity_score, movie_name))
        
    # Convert similarity table into a data frame
    mov_rec1 = pd.DataFrame(similarity_table1, columns = ['similarity', 'movie name'])
    # Scale cosine score by rating
    mov_rec1['similarity'] = mov_rec1['similarity'] * score1
    # Sort data set to descending
    mov_rec1 = mov_rec1.sort_values('similarity', ascending = False)
    
    ##############################################################################################################
    
    # Calculate cosine similarity for all three chosen movies' vectors against all other movie vectors
    similarity_table2 = []
    for movie_name in algo.trainset._raw2inner_id_items.keys():
        movie_vector = get_vector_by_movie_name(movie_name, algo)
        similarity_score = cosine_distance(vector2, movie_vector)
        similarity_table2.append((1-similarity_score, movie_name))
        
    # Convert similarity table into a data frame
    mov_rec2 = pd.DataFrame(similarity_table2, columns = ['similarity', 'movie name'])
    # Scale cosine score by rating
    mov_rec2['similarity'] = mov_rec2['similarity'] * score2
    # Sort data set to descending
    mov_rec2 = mov_rec2.sort_values('similarity', ascending = False)
    
    ##############################################################################################################
    
    # Calculate cosine similarity for all three chosen movies' vectors against all other movie vectors
    similarity_table3 = []
    for movie_name in algo.trainset._raw2inner_id_items.keys():
        movie_vector = get_vector_by_movie_name(movie_name, algo)
        similarity_score = cosine_distance(vector3, movie_vector)
        similarity_table3.append((1-similarity_score, movie_name))
    
    # Convert similarity table into a data frame
    mov_rec3 = pd.DataFrame(similarity_table3, columns = ['similarity', 'movie name'])
    # Scale cosine score by rating
    mov_rec3['similarity'] = mov_rec3['similarity'] * score3
    # Sort data set to descending
    mov_rec3 = mov_rec3.sort_values('similarity', ascending = False)
    
    ##############################################################################################################
    
    # Create a list of all data frames
    df_list = [mov_rec1, mov_rec2, mov_rec3]
    # Concatenate all data frames by axis 0
    mov_rec4 = pd.concat(df_list, axis = 0)
    # Remove all three chosen movies 
    mov_rec4 = mov_rec4.loc[(mov_rec4['movie name'] != mov1) & (mov_rec4['movie name'] != mov2) &
                            (mov_rec4['movie name'] != mov3)].reset_index(drop = True)
    # Scale cosine score by duplicates
    mov_rec4 = mov_rec4.groupby(by = "movie name").sum().reset_index()
    # Sort values by cosine values in descending order
    mov_rec4 = mov_rec4.sort_values('similarity', ascending = False).reset_index(drop = True)
    # Get similarity score as a list
    sim_score = mov_rec4['similarity'].to_list()[:10]
    # Print recommendations
    return sim_score

The above function returns a list of top 10 movie recommendations by cosine similarity score (score is inversed so that 1 is better and 0 is worse.

In [28]:
# Test recommendation engine function
recommendation_name("Jurassic Park (1993)", 5, 
                    "Godfather, The (1972)", 5, 
                    "Kill Bill: Vol. 2 (2004)", 5)

['Kill Bill: Vol. 1 (2003)',
 'Godfather: Part II, The (1974)',
 'Pulp Fiction (1994)',
 'Jaws (1975)',
 'Goodfellas (1990)',
 'Godfather: Part III, The (1990)',
 'No Country for Old Men (2007)',
 'Reservoir Dogs (1992)',
 'Silence of the Lambs, The (1991)',
 'Lost World: Jurassic Park, The (1997)']

In [29]:
# Test recommendation engine function
recommendation_score("Jurassic Park (1993)", 5, 
                     "Godfather, The (1972)", 5, 
                     "Kill Bill: Vol. 2 (2004)", 5)

[4.176942465272439,
 3.919729844974633,
 2.0711904940357506,
 1.979486457854048,
 1.8274266984051857,
 1.7240184805996932,
 1.5893893889194515,
 1.506917968568875,
 1.436480093644545,
 1.3690673410235237]

Both functions are tested.

In [30]:
# Create recommendation function
def trailer (mov1, rating1, mov2, rating2, mov3, rating3):
    
    # Import packages
    import pandas as pd
    import numpy as np
    import surprise
    from scipy.spatial.distance import cosine
    from pickle import dump
    from pickle import load
    
    # Load algo as pickle file
    algo = load(open('SVD_Model.pkl', 'rb'))
    
    # Cosine distance between vectors calculation
    def cosine_distance(vector_a = np.array, vector_b = np.array):
        return cosine(vector_a, vector_b)
    
    # Retrieve vectors by movie name
    def get_vector_by_movie_name(movie_name, trained_model):
        movie_row_idx = trained_model.trainset._raw2inner_id_items[movie_name]
        return trained_model.qi[movie_row_idx]
    
    # Get vectors by restaurant name for three movies
    vector1 = get_vector_by_movie_name(mov1, algo)
    score1 = rating1
    vector2 = get_vector_by_movie_name(mov2, algo)
    score2 = rating2
    vector3 = get_vector_by_movie_name(mov3, algo)
    score3 = rating3
    
    ##############################################################################################################
    
    # Calculate cosine similarity for all three chosen movies' vectors against all other movie vectors
    similarity_table1 = []
    for movie_name in algo.trainset._raw2inner_id_items.keys():
        movie_vector = get_vector_by_movie_name(movie_name, algo)
        similarity_score = cosine_distance(vector1, movie_vector)
        similarity_table1.append((1-similarity_score, movie_name))
        
    # Convert similarity table into a data frame
    mov_rec1 = pd.DataFrame(similarity_table1, columns = ['similarity', 'movie name'])
    # Scale cosine score by rating
    mov_rec1['similarity'] = mov_rec1['similarity'] * score1
    # Sort data set to descending
    mov_rec1 = mov_rec1.sort_values('similarity', ascending = False)
    
    ##############################################################################################################
    
    # Calculate cosine similarity for all three chosen movies' vectors against all other movie vectors
    similarity_table2 = []
    for movie_name in algo.trainset._raw2inner_id_items.keys():
        movie_vector = get_vector_by_movie_name(movie_name, algo)
        similarity_score = cosine_distance(vector2, movie_vector)
        similarity_table2.append((1-similarity_score, movie_name))
        
    # Convert similarity table into a data frame
    mov_rec2 = pd.DataFrame(similarity_table2, columns = ['similarity', 'movie name'])
    # Scale cosine score by rating
    mov_rec2['similarity'] = mov_rec2['similarity'] * score2
    # Sort data set to descending
    mov_rec2 = mov_rec2.sort_values('similarity', ascending = False)
    
    ##############################################################################################################
    
    # Calculate cosine similarity for all three chosen movies' vectors against all other movie vectors
    similarity_table3 = []
    for movie_name in algo.trainset._raw2inner_id_items.keys():
        movie_vector = get_vector_by_movie_name(movie_name, algo)
        similarity_score = cosine_distance(vector3, movie_vector)
        similarity_table3.append((1-similarity_score, movie_name))
    
    # Convert similarity table into a data frame
    mov_rec3 = pd.DataFrame(similarity_table3, columns = ['similarity', 'movie name'])
    # Scale cosine score by rating
    mov_rec3['similarity'] = mov_rec3['similarity'] * score3
    # Sort data set to descending
    mov_rec3 = mov_rec3.sort_values('similarity', ascending = False)
    
    ##############################################################################################################
    
    # Create a list of all data frames
    df_list = [mov_rec1, mov_rec2, mov_rec3]
    # Concatenate all data frames by axis 0
    mov_rec4 = pd.concat(df_list, axis = 0)
    # Remove all three chosen movies 
    mov_rec4 = mov_rec4.loc[(mov_rec4['movie name'] != mov1) & (mov_rec4['movie name'] != mov2) &
                            (mov_rec4['movie name'] != mov3)].reset_index(drop = True)
    # Scale cosine score by duplicates
    mov_rec4 = mov_rec4.groupby(by = "movie name").sum().reset_index()
    # Sort values by cosine values in descending order
    mov_rec4 = mov_rec4.sort_values('similarity', ascending = False).reset_index(drop = True)
    # Get movie names as a list
    mov_name = list(mov_rec4['movie name'].to_list()[:10])
    # Convert top ten list to data frame 
    mov_name = pd.DataFrame(mov_name, columns = ['movie'])
    # Import data
    data = pd.read_csv('Data/Movie_Ratings.csv')
    # Merge recommendations with trailer data
    mov_link = mov_name.merge(data, on = 'movie').drop_duplicates(subset = ['movie'])
    # Convert movie trailer data to list
    mov_link = mov_link['youtubeId'].to_list()
    # Print recommendations
    return mov_link

The above function returns a list of top 10 movie recommendations by YouTube trailer link.

In [31]:
trailer("Jurassic Park (1993)", 5, 
        "Godfather, The (1972)", 5, 
        "Kill Bill: Vol. 2 (2004)", 5)

['3sbKt3ptUJo',
 '9O1Iy9od7-A',
 's7EdQ4FqbhY',
 'U1fu_sA7XhE',
 'qo5jJpHtI1Y',
 'z8h3LVb8cl8',
 'qnwNuG1ayno',
 'GLPJSmUHZvU',
 'lQKs169Sl0I',
 'CIkOdrJGNy0']

The above function is tested.

In [32]:
connection.deploy(name = "recommendation_name", obj = recommendation_name, override = True)
connection.deploy(name = "recommendation_score", obj = recommendation_score, override = True)
connection.deploy(name = "trailer", obj = trailer, override = True)

The functions were sent to Tableau server to be leveraged in Tableau.