In [1]:
import pandas as pd

data = {
    'Movie': ['Movie1', 'Movie2', 'Movie3', 'Movie4'],
    'Genre': [
        'Action Adventure',
        'Romance Drama',
        'Action Thriller',
        'Drama Romance'
    ]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Movie,Genre
0,Movie1,Action Adventure
1,Movie2,Romance Drama
2,Movie3,Action Thriller
3,Movie4,Drama Romance


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['Genre'])

tfidf_matrix

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 8 stored elements and shape (4, 5)>

In [3]:
from sklearn.metrics.pairwise import cosine_similarity

content_similarity = cosine_similarity(tfidf_matrix)

content_similarity_df = pd.DataFrame(
    content_similarity,
    index=df['Movie'],
    columns=df['Movie']
)

content_similarity_df

Movie,Movie1,Movie2,Movie3,Movie4
Movie,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Movie1,1.0,0.0,0.383322,0.0
Movie2,0.0,1.0,0.0,1.0
Movie3,0.383322,0.0,1.0,0.0
Movie4,0.0,1.0,0.0,1.0


In [4]:
def get_similar_movies(movie, similarity_df, n=2):
    return similarity_df[movie].sort_values(ascending=False)[1:n+1]

get_similar_movies('Movie1', content_similarity_df)

Movie
Movie3    0.383322
Movie2    0.000000
Name: Movie1, dtype: float64

In [5]:
def recommend_content_based(movie, similarity_df, n_recommendations=2):
    return similarity_df[movie].sort_values(ascending=False)[1:n_recommendations+1]

recommend_content_based('Movie1', content_similarity_df)

Movie
Movie3    0.383322
Movie2    0.000000
Name: Movie1, dtype: float64