In [9]:
import lime
import lime.lime_tabular
import shap
import pandas as pd
import pickle

# Load cosine similarity matrix (cos_sim)
with open(r"C:\Users\ASUS\Desktop\documents canada\cosine_similarity1.pkl", 'rb') as f:
    cos_sim = pickle.load(f)

# Load content dataframe (content_df)
content_df = pd.read_csv(r"C:\Users\ASUS\Desktop\documents canada\content_df.csv")

# Function to predict scores and similarities for similar movies
def predict(title, similarity_weight=0.7, top_n=10):
    data = content_df.reset_index()
    index_movie = data[data['original_title'] == title].index
    similarity = cos_sim[index_movie].T
    sim_df = pd.DataFrame(similarity, columns=['similarity'])
    final_df = pd.concat([data, sim_df], axis=1)
    final_df['final_score'] = final_df['score']*(1-similarity_weight) + final_df['similarity']*similarity_weight
    final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
    final_df_sorted.set_index('original_title', inplace=True)
    return final_df_sorted[['score', 'similarity', 'final_score']]

# Generate predictions
predictions = predict('Toy Story', similarity_weight=0.7, top_n=10)

# Extract features for LIME
X = predictions.drop(columns=['score', 'final_score'])  # Remove 'score' and 'final_score' columns
y = pd.Series([0] * len(X))  # Create a dummy target variable (can be any value)

# Print the shapes of X and y for verification
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)


Shape of X: (10, 1)
Shape of y: (10,)


LIME and SHAP expect X to be a 2-dimensional array where each row represents a sample and each column represents a feature. In this case, X has only one feature column, which may cause issues with LIME.

To resolve this, we need to ensure that X has at least two feature columns, even if one of them is a constant value. Let's modify the code to include an additional dummy feature column:

In [10]:
import numpy as np

# Add a dummy feature column to X
X['dummy_feature'] = np.zeros(len(X))

# Ensure that X has the expected shape
print("Updated shape of X:", X.shape)

Updated shape of X: (10, 2)


In [11]:
import lime
import lime.lime_tabular
import shap

# Apply LIME
explainer = lime.lime_tabular.LimeTabularExplainer(X.values, feature_names=X.columns)
exp = explainer.explain_instance(X.iloc[0].values, predict, num_features=len(X.columns))

# Apply SHAP
shap_explainer = shap.Explainer(predict, X)
shap_values = shap_explainer(X)

# Plot LIME explanation
exp.show_in_notebook(show_table=True)

# Plot SHAP summary plot
shap.summary_plot(shap_values, X)


ValueError: ('Lengths must match to compare', (10000,), (5000, 2))

In [12]:
import lime
from lime import lime_tabular
import shap
import pandas as pd
import pickle

# Load the content dataframe
content_df = pd.read_csv(r"C:\Users\ASUS\Desktop\documents canada\content_df.csv")

# Load the cosine similarity matrix
with open(r"C:\Users\ASUS\Desktop\documents canada\cosine_similarity1.pkl", 'rb') as f:
    cos_sim = pickle.load(f)

# Define the predict function
def predict(title, similarity_weight=0.7, top_n=10):
    """
    Predicts top movies similar to the given movie title based on similarity score.

    Parameters:
    - title (str): The title of the movie for which predictions are to be made.
    - similarity_weight (float): Weightage given to similarity score in the final scoring. Default is 0.7.
    - top_n (int): Number of top similar movies to return. Default is 10.

    Returns:
    - DataFrame: DataFrame containing top similar movies along with their scores and similarities.
    """
    # Reset index of content_df DataFrame
    data = content_df.reset_index()
    
    # Get the index of the movie with the given title
    index_movie = data[data['original_title'] == title].index
    
    # Transpose cosine similarity matrix to get similarities for the given movie
    similarity = cos_sim[index_movie].T
    
    # Create a DataFrame containing similarity scores
    sim_df = pd.DataFrame(similarity, columns=['similarity'])
    
    # Concatenate the similarity DataFrame with the data DataFrame
    final_df = pd.concat([data, sim_df], axis=1)
    
    # Calculate final score using similarity_weight
    final_df['final_score'] = final_df['score']*(1-similarity_weight) + final_df['similarity']*similarity_weight
    
    # Sort DataFrame based on final score in descending order and select top_n movies
    final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
    
    # Set 'original_title' as index
    final_df_sorted.set_index('original_title', inplace=True)
    
    # Return DataFrame containing scores and similarities of top similar movies
    return final_df_sorted[['score', 'similarity', 'final_score']]

# Example usage of predict function
result = predict('Toy Story', similarity_weight=0.7, top_n=10)
print(result)

# Create a single instance DataFrame for explanation
instance = content_df.head(1)

# Apply LIME
explainer = lime_tabular.LimeTabularExplainer(instance.values, feature_names=instance.columns)
exp = explainer.explain_instance(instance.iloc[0].values, predict, num_features=len(instance.columns))


                         score  similarity  final_score
original_title                                         
Toy Story             0.348515    1.000000     0.804554
Toy Story 2           0.317785    0.537320     0.471459
Toy Story 3           0.336500    0.274778     0.293294
Toy Story of Terror!  0.282269    0.294858     0.291081
Small Fry             0.256223    0.271027     0.266586
Hawaiian Vacation     0.266277    0.263818     0.264556
Minions               0.841412    0.005375     0.256186
Finding Nemo          0.346184    0.203631     0.246397
WALL·E                0.348681    0.196732     0.242317
A Bug's Life          0.284637    0.215010     0.235898


TypeError: unsupported operand type(s) for -: 'str' and 'str'

There's a problem with the data types in the instance DataFrame, particularly when computing quartiles for discretization. The error message "TypeError: unsupported operand type(s) for -: 'str' and 'str'" suggests that the columns in the instance DataFrame contain string values instead of numerical values.

To address this issue, we need to ensure that the data in the instance DataFrame is in a numerical format before passing it to the LIME explainer. We can achieve this by selecting only the numerical columns from the content_df DataFrame when creating the instance DataFrame.

In [13]:
import lime
from lime import lime_tabular
import pandas as pd
import pickle

# Load the content dataframe
content_df = pd.read_csv(r"C:\Users\ASUS\Desktop\documents canada\content_df.csv")

# Load the cosine similarity matrix
with open(r"C:\Users\ASUS\Desktop\documents canada\cosine_similarity1.pkl", 'rb') as f:
    cos_sim = pickle.load(f)

# Define the predict function
def predict(title, similarity_weight=0.7, top_n=10):
    """
    Predicts top movies similar to the given movie title based on similarity score.

    Parameters:
    - title (str): The title of the movie for which predictions are to be made.
    - similarity_weight (float): Weightage given to similarity score in the final scoring. Default is 0.7.
    - top_n (int): Number of top similar movies to return. Default is 10.

    Returns:
    - DataFrame: DataFrame containing top similar movies along with their scores and similarities.
    """
    # Reset index of content_df DataFrame
    data = content_df.reset_index()
    
    # Get the index of the movie with the given title
    index_movie = data[data['original_title'] == title].index
    
    # Transpose cosine similarity matrix to get similarities for the given movie
    similarity = cos_sim[index_movie].T
    
    # Create a DataFrame containing similarity scores
    sim_df = pd.DataFrame(similarity, columns=['similarity'])
    
    # Concatenate the similarity DataFrame with the data DataFrame
    final_df = pd.concat([data, sim_df], axis=1)
    
    # Calculate final score using similarity_weight
    final_df['final_score'] = final_df['score']*(1-similarity_weight) + final_df['similarity']*similarity_weight
    
    # Sort DataFrame based on final score in descending order and select top_n movies
    final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
    
    # Set 'original_title' as index
    final_df_sorted.set_index('original_title', inplace=True)
    
    # Return DataFrame containing scores and similarities of top similar movies
    return final_df_sorted[['score', 'similarity', 'final_score']]

# Example usage of predict function
result = predict('Toy Story', similarity_weight=0.7, top_n=10)
print(result)

# Create a single instance DataFrame for explanation
instance = content_df.select_dtypes(include='number').head(1)  # Select only numerical columns

# Apply LIME
explainer = lime_tabular.LimeTabularExplainer(instance.values, feature_names=instance.columns)
exp = explainer.explain_instance(instance.iloc[0].values, predict, num_features=len(instance.columns))


                         score  similarity  final_score
original_title                                         
Toy Story             0.348515    1.000000     0.804554
Toy Story 2           0.317785    0.537320     0.471459
Toy Story 3           0.336500    0.274778     0.293294
Toy Story of Terror!  0.282269    0.294858     0.291081
Small Fry             0.256223    0.271027     0.266586
Hawaiian Vacation     0.266277    0.263818     0.264556
Minions               0.841412    0.005375     0.256186
Finding Nemo          0.346184    0.203631     0.246397
WALL·E                0.348681    0.196732     0.242317
A Bug's Life          0.284637    0.215010     0.235898


ValueError: ('Lengths must match to compare', (10000,), (5000, 3))

The error indicates that there is an inconsistency in the lengths of the data used for comparison in the predict function. Specifically, the length of the data obtained from content_df does not match the length of the data obtained from the input title. This mismatch leads to the ValueError.

To resolve this issue, ensure that the data used for comparison has the same length. Here's the part of the code that needs to be adjusted:

In [19]:
def predict(title, similarity_weight=0.7, top_n=10):
    try:
        # Load the content dataframe
        content_df = pd.read_csv(r"C:\Users\ASUS\Desktop\documents canada\content_df.csv")
        
        # Load the cosine similarity matrix
        with open(r"C:\Users\ASUS\Desktop\documents canada\cosine_similarity1.pkl", 'rb') as f:
            cos_sim = pickle.load(f)
        
        # Reset index of content_df DataFrame
        data = content_df.reset_index()
        
        # Get the index of the movie with the given title
        index_movie = data[data['original_title'] == title].index
        
        print("Index of movie:", index_movie)  # Print index of the movie
        
        # Transpose cosine similarity matrix to get similarities for the given movie
        similarity = cos_sim[index_movie].T
        
        print("Similarity:", similarity)  # Print similarity scores
        
        # Create a DataFrame containing similarity scores
        sim_df = pd.DataFrame(similarity, columns=['similarity'])
        
        # Concatenate the similarity DataFrame with the data DataFrame
        final_df = pd.concat([data, sim_df], axis=1)
        
        # Calculate final score using similarity_weight
        final_df['final_score'] = final_df['score'] * (1 - similarity_weight) + final_df['similarity'] * similarity_weight
        
        # Sort DataFrame based on final score in descending order and select top_n movies
        final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
        
        # Set 'original_title' as index
        final_df_sorted.set_index('original_title', inplace=True)
        
        # Return DataFrame containing scores and similarities of top similar movies
        return final_df_sorted[['score', 'similarity', 'final_score']]
    
    except Exception as e:
        print("An error occurred:", e)

# Test the function
result = predict('Toy Story', similarity_weight=0.7, top_n=10)
print(result)

Index of movie: Int64Index([131], dtype='int64')
Similarity: [[0.00537545]
 [0.00694583]
 [0.00015944]
 ...
 [0.02084702]
 [0.00368631]
 [0.00239486]]
                         score  similarity  final_score
original_title                                         
Toy Story             0.348515    1.000000     0.804554
Toy Story 2           0.317785    0.537320     0.471459
Toy Story 3           0.336500    0.274778     0.293294
Toy Story of Terror!  0.282269    0.294858     0.291081
Small Fry             0.256223    0.271027     0.266586
Hawaiian Vacation     0.266277    0.263818     0.264556
Minions               0.841412    0.005375     0.256186
Finding Nemo          0.346184    0.203631     0.246397
WALL·E                0.348681    0.196732     0.242317
A Bug's Life          0.284637    0.215010     0.235898


In [None]:
changing the hyperparameters

In [20]:
def predict(title, similarity_weight=1, top_n=20):
    try:
        # Load the content dataframe
        content_df = pd.read_csv(r"C:\Users\ASUS\Desktop\documents canada\content_df.csv")
        
        # Load the cosine similarity matrix
        with open(r"C:\Users\ASUS\Desktop\documents canada\cosine_similarity1.pkl", 'rb') as f:
            cos_sim = pickle.load(f)
        
        # Reset index of content_df DataFrame
        data = content_df.reset_index()
        
        # Get the index of the movie with the given title
        index_movie = data[data['original_title'] == title].index
        
        print("Index of movie:", index_movie)  # Print index of the movie
        
        # Transpose cosine similarity matrix to get similarities for the given movie
        similarity = cos_sim[index_movie].T
        
        print("Similarity:", similarity)  # Print similarity scores
        
        # Create a DataFrame containing similarity scores
        sim_df = pd.DataFrame(similarity, columns=['similarity'])
        
        # Concatenate the similarity DataFrame with the data DataFrame
        final_df = pd.concat([data, sim_df], axis=1)
        
        # Calculate final score using similarity_weight
        final_df['final_score'] = final_df['score'] * (1 - similarity_weight) + final_df['similarity'] * similarity_weight
        
        # Sort DataFrame based on final score in descending order and select top_n movies
        final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
        
        # Set 'original_title' as index
        final_df_sorted.set_index('original_title', inplace=True)
        
        # Return DataFrame containing scores and similarities of top similar movies
        return final_df_sorted[['score', 'similarity', 'final_score']]
    
    except Exception as e:
        print("An error occurred:", e)

# Test the function
result = predict('Big Hero 6', similarity_weight=1, top_n=20)
print(result)

Index of movie: Int64Index([1], dtype='int64')
Similarity: [[1.23688295e-02]
 [1.00000000e+00]
 [5.73268467e-04]
 ...
 [2.95992028e-03]
 [1.58281315e-03]
 [1.02829603e-03]]
                                                 score  similarity  \
original_title                                                       
Big Hero 6                                    0.565385    1.000000   
Despicable Me 3                               0.268309    0.882704   
Treasure Planet                               0.306237    0.751305   
Oliver & Company                              0.253648    0.660418   
Toy Story 3                                   0.336500    0.654969   
That's What I Am                              0.238026    0.617376   
The Madagascar Penguins in a Christmas Caper  0.226295    0.616734   
The Curse of the Were-Rabbit                  0.275298    0.296992   
Buried                                        0.259472    0.279776   
Ice Age: Dawn of the Dinosaurs                0.261427   

In [21]:
import pandas as pd
import pickle
import lime
import lime.lime_tabular
import shap

# Load the content dataframe
content_df = pd.read_csv(r"C:\Users\ASUS\Desktop\documents canada\content_df.csv")

# Load the cosine similarity matrix
with open(r"C:\Users\ASUS\Desktop\documents canada\cosine_similarity1.pkl", 'rb') as f:
    cos_sim = pickle.load(f)

# Function to predict top similar movies
def predict(title, similarity_weight=1, top_n=20):
    try:
        # Reset index of content_df DataFrame
        data = content_df.reset_index()
        
        # Get the index of the movie with the given title
        index_movie = data[data['original_title'] == title].index
        
        # Transpose cosine similarity matrix to get similarities for the given movie
        similarity = cos_sim[index_movie].T
        
        # Create a DataFrame containing similarity scores
        sim_df = pd.DataFrame(similarity, columns=['similarity'])
        
        # Concatenate the similarity DataFrame with the data DataFrame
        final_df = pd.concat([data, sim_df], axis=1)
        
        # Calculate final score using similarity_weight
        final_df['final_score'] = final_df['score'] * (1 - similarity_weight) + final_df['similarity'] * similarity_weight
        
        # Sort DataFrame based on final score in descending order and select top_n movies
        final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
        
        # Set 'original_title' as index
        final_df_sorted.set_index('original_title', inplace=True)
        
        # Get the features for LIME
        features = final_df_sorted.drop(columns=['score', 'similarity', 'final_score'])
        
        return final_df_sorted, features
    
    except Exception as e:
        print("An error occurred:", e)

# Test the function
result, features = predict('Big Hero 6', similarity_weight=1, top_n=20)
print(result)

# Apply LIME
explainer = lime.lime_tabular.LimeTabularExplainer(features.values, feature_names=features.columns)
exp = explainer.explain_instance(features.iloc[0].values, predict, num_features=len(features.columns))

# Apply SHAP
shap_explainer = shap.Explainer(predict, features)
shap_values = shap_explainer.shap_values(features)


                                              index  popularity  \
original_title                                                    
Big Hero 6                                        1    0.390602   
Despicable Me 3                                2227    0.066908   
Treasure Planet                                 691    0.029484   
Oliver & Company                               3294    0.019751   
Toy Story 3                                     223    0.030990   
That's What I Am                               4944    0.014320   
The Madagascar Penguins in a Christmas Caper   6922    0.005711   
The Curse of the Were-Rabbit                   1825    0.021491   
Buried                                         2778    0.013497   
Ice Age: Dawn of the Dinosaurs                 2634    0.023709   
猫の恩返し                                          1076    0.021259   
Le Tableau                                     4746    0.004478   
Wreck-It Ralph                                  843    0.02501

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [22]:
# One-hot encode categorical variables
features_encoded = pd.get_dummies(features)

# Apply LIME
explainer = lime.lime_tabular.LimeTabularExplainer(features_encoded.values, feature_names=features_encoded.columns)
exp = explainer.explain_instance(features_encoded.iloc[0].values, predict, num_features=len(features_encoded.columns))


An error occurred: ('Lengths must match to compare', (10000,), (5000, 23))


AttributeError: 'NoneType' object has no attribute 'shape'

In [23]:
import pandas as pd
import numpy as np
import pickle
import lime
import lime.lime_tabular

# Define the predict function
def predict(title, similarity_weight=1, top_n=20):
    try:
        # Load the content dataframe
        content_df = pd.read_csv(r"C:\Users\ASUS\Desktop\documents canada\content_df.csv")
        
        # Load the cosine similarity matrix
        with open(r"C:\Users\ASUS\Desktop\documents canada\cosine_similarity1.pkl", 'rb') as f:
            cos_sim = pickle.load(f)
        
        # Reset index of content_df DataFrame
        data = content_df.reset_index()
        
        # Get the index of the movie with the given title
        index_movie = data[data['original_title'] == title].index
        
        # Transpose cosine similarity matrix to get similarities for the given movie
        similarity = cos_sim[index_movie].T
        
        # Create a DataFrame containing similarity scores
        sim_df = pd.DataFrame(similarity, columns=['similarity'])
        
        # Concatenate the similarity DataFrame with the data DataFrame
        final_df = pd.concat([data, sim_df], axis=1)
        
        # Calculate final score using similarity_weight
        final_df['final_score'] = final_df['score'] * (1 - similarity_weight) + final_df['similarity'] * similarity_weight
        
        # Sort DataFrame based on final score in descending order and select top_n movies
        final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
        
        # Set 'original_title' as index
        final_df_sorted.set_index('original_title', inplace=True)
        
        # Return DataFrame containing scores and similarities of top similar movies
        return final_df_sorted[['score', 'similarity', 'final_score']]
    
    except Exception as e:
        print("An error occurred:", e)

# Test the function
result = predict('Big Hero 6', similarity_weight=1, top_n=20)
print(result)

# Load your features_encoded DataFrame
features_encoded = pd.DataFrame()

# Apply LIME
explainer = lime.lime_tabular.LimeTabularExplainer(features_encoded.values, feature_names=features_encoded.columns)
exp = explainer.explain_instance(features_encoded.iloc[0].values, predict, num_features=len(features_encoded.columns))


                                                 score  similarity  \
original_title                                                       
Big Hero 6                                    0.565385    1.000000   
Despicable Me 3                               0.268309    0.882704   
Treasure Planet                               0.306237    0.751305   
Oliver & Company                              0.253648    0.660418   
Toy Story 3                                   0.336500    0.654969   
That's What I Am                              0.238026    0.617376   
The Madagascar Penguins in a Christmas Caper  0.226295    0.616734   
The Curse of the Were-Rabbit                  0.275298    0.296992   
Buried                                        0.259472    0.279776   
Ice Age: Dawn of the Dinosaurs                0.261427    0.252847   
猫の恩返し                                         0.293989    0.250492   
Le Tableau                                    0.239715    0.249453   
Wreck-It Ralph      

ValueError: Found array with 0 sample(s) (shape=(0, 0)) while a minimum of 1 is required by StandardScaler.

In [24]:
import pandas as pd
import numpy as np
import pickle
import lime
import lime.lime_tabular

# Define the predict function
def predict(title, similarity_weight=1, top_n=20):
    try:
        # Load the content dataframe
        content_df = pd.read_csv(r"C:\Users\ASUS\Desktop\documents canada\content_df.csv")
        
        # Load the cosine similarity matrix
        with open(r"C:\Users\ASUS\Desktop\documents canada\cosine_similarity1.pkl", 'rb') as f:
            cos_sim = pickle.load(f)
        
        # Reset index of content_df DataFrame
        data = content_df.reset_index()
        
        # Get the index of the movie with the given title
        index_movie = data[data['original_title'] == title].index
        
        # Transpose cosine similarity matrix to get similarities for the given movie
        similarity = cos_sim[index_movie].T
        
        # Create a DataFrame containing similarity scores
        sim_df = pd.DataFrame(similarity, columns=['similarity'])
        
        # Concatenate the similarity DataFrame with the data DataFrame
        final_df = pd.concat([data, sim_df], axis=1)
        
        # Calculate final score using similarity_weight
        final_df['final_score'] = final_df['score'] * (1 - similarity_weight) + final_df['similarity'] * similarity_weight
        
        # Sort DataFrame based on final score in descending order and select top_n movies
        final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
        
        # Set 'original_title' as index
        final_df_sorted.set_index('original_title', inplace=True)
        
        # Return DataFrame containing scores and similarities of top similar movies
        return final_df_sorted[['score', 'similarity', 'final_score']]
    
    except Exception as e:
        print("An error occurred:", e)

# Test the function
result = predict('Big Hero 6', similarity_weight=1, top_n=20)
print(result)

# Load your features_encoded DataFrame
features_encoded = pd.DataFrame()  # Make sure this DataFrame is not empty before applying LIME

# Check if features_encoded DataFrame is not empty
if not features_encoded.empty:
    # Apply LIME
    explainer = lime.lime_tabular.LimeTabularExplainer(features_encoded.values, feature_names=features_encoded.columns)
    exp = explainer.explain_instance(features_encoded.iloc[0].values, predict, num_features=len(features_encoded.columns))
else:
    print("The features_encoded DataFrame is empty. Please populate it with data before applying LIME.")


                                                 score  similarity  \
original_title                                                       
Big Hero 6                                    0.565385    1.000000   
Despicable Me 3                               0.268309    0.882704   
Treasure Planet                               0.306237    0.751305   
Oliver & Company                              0.253648    0.660418   
Toy Story 3                                   0.336500    0.654969   
That's What I Am                              0.238026    0.617376   
The Madagascar Penguins in a Christmas Caper  0.226295    0.616734   
The Curse of the Were-Rabbit                  0.275298    0.296992   
Buried                                        0.259472    0.279776   
Ice Age: Dawn of the Dinosaurs                0.261427    0.252847   
猫の恩返し                                         0.293989    0.250492   
Le Tableau                                    0.239715    0.249453   
Wreck-It Ralph      

In [26]:
import shap

# Define the function to get SHAP explanation
def explain_recommendation_with_shap(title, similarity_weight=1, top_n=20):
    try:
        # Load the content dataframe
        content_df = pd.read_csv(r"C:\Users\ASUS\Desktop\documents canada\content_df.csv")
        
        # Load the cosine similarity matrix
        with open(r"C:\Users\ASUS\Desktop\documents canada\cosine_similarity1.pkl", 'rb') as f:
            cos_sim = pickle.load(f)
        
        # Reset index of content_df DataFrame
        data = content_df.reset_index()
        
        # Get the index of the movie with the given title
        index_movie = data[data['original_title'] == title].index
        
        # Transpose cosine similarity matrix to get similarities for the given movie
        similarity = cos_sim[index_movie].T
        
        # Create a DataFrame containing similarity scores
        sim_df = pd.DataFrame(similarity, columns=['similarity'])
        
        # Concatenate the similarity DataFrame with the data DataFrame
        final_df = pd.concat([data, sim_df], axis=1)
        
        # Calculate final score using similarity_weight
        final_df['final_score'] = final_df['score'] * (1 - similarity_weight) + final_df['similarity'] * similarity_weight
        
        # Sort DataFrame based on final score in descending order and select top_n movies
        final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
        
        # Set 'original_title' as index
        final_df_sorted.set_index('original_title', inplace=True)
        
        # Get the top recommendation
        top_recommendation = final_df_sorted.iloc[0]
        
        # Get feature matrix for SHAP
        X = final_df_sorted.drop(['score', 'similarity', 'final_score'], axis=1)
        
        # Get SHAP values
        shap_values = shap.Explainer(predict, X.values).shap_values(top_recommendation.name)
        
        # Plot SHAP values
        shap.initjs()
        shap.force_plot(shap.Explainer(predict, X.values).expected_value, shap_values, feature_names=X.columns)
        
    except Exception as e:
        print("An error occurred:", e)

# Test the function
explain_recommendation_with_shap('Big Hero 6', similarity_weight=1, top_n=20)


An error occurred: 'ExactExplainer' object has no attribute 'shap_values'
