In [None]:
# Import TensorFlow and hub
import tensorflow as tf
import tensorflow_hub as hub

# Plotting
import matplotlib.pyplot as plt

# Some important packages
import os
import re
import numpy as np
import pandas as pd

# scikit-learn
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA

model_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
model = hub.load(model_url)
print('Model Loaded')

def embed(texts):
    return model(texts)

# Load movie data
df = pd.read_csv("Top_10000_Movies.csv", engine="python")

# Preprocess the data
df = df[["original_title", "overview"]]
df = df.dropna()
df = df.reset_index()
df = df[:5500]

titles = list(df['overview'])

embeddings = embed(titles)
print('The embedding shape is:', embeddings.shape)

pca = PCA(n_components=2)
emb_2d = pca.fit_transform(embeddings)

plt.figure(figsize=(11, 6))
plt.title('Embedding space')
plt.scatter(emb_2d[:, 0], emb_2d[:, 1])
plt.show()

nn = NearestNeighbors(n_neighbors=10)
nn.fit(embeddings)

def recommend(text):
    emb = embed([text])
    neighbors = nn.kneighbors(emb, return_distance=False)[0]
    return df['original_title'].iloc[neighbors].tolist()

# Define a list of 10 movie names
movie_names = [
    "Harry Potter",
    "The Lord of the Rings",
    "Inception",
    "The Shawshank Redemption",
    "The Dark Knight",
    "The Matrix",
    "Forrest Gump",
    "Pulp Fiction",
    "Fight Club",
    "The Godfather"
]

# Generate recommendations for each movie name
for name in movie_names:
    print('Recommended Movies for', name)
    recommended_movies = recommend(name)
    print(recommended_movies)
    print('\n')
