In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity



In [2]:
# Sample movie ratings data
data = {
    'User': ['User 1', 'User 2', 'User 3', 'User 4'],
    'Movie 1': [5, 0, 4, 0],
    'Movie 2': [4, 5, 0, 3],
    'Movie 3': [0, 4, 0, 5],
    'Movie 4': [3, 0, 5, 4]
}



In [4]:
# Create a DataFrame from the data
df = pd.DataFrame(data)

df

Unnamed: 0,User,Movie 1,Movie 2,Movie 3,Movie 4
0,User 1,5,4,0,3
1,User 2,0,5,4,0
2,User 3,4,0,0,5
3,User 4,0,3,5,4


In [5]:
# Calculate similarity matrix using cosine similarity
similarity_matrix = cosine_similarity(df.iloc[:, 1:])



In [7]:
# Convert similarity matrix to a DataFrame for better visualization
similarity_df = pd.DataFrame(similarity_matrix, index=df.columns[1:], columns=df.columns[1:])

similarity_df

Unnamed: 0,Movie 1,Movie 2,Movie 3,Movie 4
Movie 1,1.0,0.441726,0.773021,0.48
Movie 2,0.441726,1.0,0.0,0.773021
Movie 3,0.773021,0.0,1.0,0.441726
Movie 4,0.48,0.773021,0.441726,1.0


In [8]:
def get_similar_movies(movie_title, top_n=2):
    """
    Function to get top similar movies to a given movie title
    
    :param movie_title: The title of the movie
    :param top_n: Number of similar movies to return
    :return: List of top similar movies
    """
    # Get similarity scores for the given movie title
    movie_similarity = similarity_df[movie_title].sort_values(ascending=False)
    
    # Exclude the movie itself
    movie_similarity = movie_similarity.drop(movie_title)
    
    # Get top similar movies
    top_similar_movies = movie_similarity.head(top_n).index.tolist()
    
    return top_similar_movies



In [9]:
# Example usage
movie_title = 'Movie 1'
similar_movies = get_similar_movies(movie_title)
print(f"Top movies similar to '{movie_title}': {similar_movies}")

Top movies similar to 'Movie 1': ['Movie 3', 'Movie 4']
