In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('moviereviews.csv')
data.head()

Unnamed: 0,movie,review
0,The Lord of the Rings The Two Towers,remarkable display of fantasy action powerful ...
1,Inception,implanting stealing idea destroy gripping acti...
2,Spiderman Across the spider verse,mind bending wild action sequences intimate em...
3,The Dark Knight,Best live action portrayal beat organized crim...
4,Three colors red,mesmerising friendship turned love profound un...


In [3]:
word = [set(review.split()) for review in data["review"]]

all_unique_words = set()
for word_set in word:
    all_unique_words.update(word_set)

word_index = {word: i for i, word in enumerate(all_unique_words)}
word_index

{'idea': 0,
 'to': 1,
 'be': 2,
 'action': 3,
 'profound': 4,
 'ego': 5,
 'journey': 6,
 'dangerous': 7,
 'heartwarming': 8,
 'bonding': 9,
 'greatest': 10,
 'enigmatic': 11,
 'sports': 12,
 'ottoman': 13,
 'movie': 14,
 'brutal': 15,
 'unconventional': 16,
 'almost': 17,
 'comedy': 18,
 'remarkable': 19,
 'of': 20,
 'ring': 21,
 'beat': 22,
 'tale': 23,
 'evening': 24,
 'film': 25,
 'brutality': 26,
 'Gotham': 27,
 'spell': 28,
 'jaw': 29,
 'turbulent': 30,
 'moments': 31,
 'ever': 32,
 'dropping': 33,
 'Neighbors': 34,
 'bending': 35,
 'someone': 36,
 'destroy': 37,
 'officer': 38,
 'turned': 39,
 'Best': 40,
 'hobbit': 41,
 'reignited': 42,
 'success': 43,
 'blossoming': 44,
 'love': 45,
 'boxing': 46,
 'live': 47,
 'infatuation': 48,
 'finding': 49,
 'life': 50,
 'rollercoaster': 51,
 'good': 52,
 'mind': 53,
 'romance': 54,
 'war': 55,
 'binding': 56,
 'organized': 57,
 'powerful': 58,
 'fantasy': 59,
 'emotional': 60,
 'mesmerising': 61,
 'fleeting': 62,
 'prisoner': 63,
 'people

In [4]:
review_vectors = np.zeros((len(data), len(all_unique_words)))
for i, word_set in enumerate(word):
    for word in word_set:
        review_vectors[i, word_index[word]] = 1

review_vectors

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [5]:
def dot_product_similarity(vector1, vector2):
    return np.dot(vector1, vector2)

def angle_similarity(vector1, vector2):
    dot_product = dot_product_similarity(vector1, vector2)
    norm_product = np.linalg.norm(vector1) * np.linalg.norm(vector2)
    cosine_similarity = dot_product / norm_product
    angle = np.arccos(cosine_similarity)
    return angle

In [6]:
similarities = []
num_movies = len(review_vectors)

for i in range(num_movies):
    for j in range(i + 1, num_movies):
        movie1 = data.loc[i, 'movie']
        movie2 = data.loc[j, 'movie']
        similarity = angle_similarity(review_vectors[i], review_vectors[j])
        similarities.append((movie1, movie2, similarity))

similarities

[('The Lord of the Rings The Two Towers', 'Inception', 1.3066176946497716),
 ('The Lord of the Rings The Two Towers',
  'Spiderman Across the spider verse',
  1.47012257324432),
 ('The Lord of the Rings The Two Towers',
  'The Dark Knight',
  1.4870743537024664),
 ('The Lord of the Rings The Two Towers',
  'Three colors red',
  1.5707963267948966),
 ('The Lord of the Rings The Two Towers',
  'It happened one night',
  1.475305009634543),
 ('The Lord of the Rings The Two Towers',
  'In the Mood for Love',
  1.5707963267948966),
 ('The Lord of the Rings The Two Towers',
  'Before Sunrise',
  1.5707963267948966),
 ('The Lord of the Rings The Two Towers',
  'Gone with the wind',
  1.5707963267948966),
 ('The Lord of the Rings The Two Towers',
  'Eternal Sunshine of the Spotless Mind',
  1.4953469269971336),
 ('The Lord of the Rings The Two Towers',
  'The Shawshank Redemption',
  1.4797615487574816),
 ('The Lord of the Rings The Two Towers', 'Raging Bull', 1.4901265642902817),
 ('The Lord 

In [7]:
similarities.sort(key=lambda x: x[2])
top_similar_pairs = similarities[:3]

for pair in top_similar_pairs:
    movie1, movie2, similarity = pair
    print(f"({movie1}) and ({movie2}) are much alike with an angle similarity of {similarity:}")

(The Lord of the Rings The Two Towers) and (Inception) are much alike with an angle similarity of 1.3066176946497716
(It happened one night) and (Gone with the wind) are much alike with an angle similarity of 1.387192316515978
(It happened one night) and (Before Sunrise) are much alike with an angle similarity of 1.400950038711223
