In [2]:
#Import the spaCy library, which is used for natural language processing (NLP) tasks.
import spacy

#Load the spaCy medium-sized English language model.
nlp = spacy.load('en_core_web_md') 

# this line of code will open the movies.txt file in read mode
movies_file_path = open("movies.txt", "r")  

# an empty dictionary to store items in our movies file (title and description) is created
movies_data = {}  

# a for loop is used to iterate over each movie in our movies_files
for movie in movies_file_path:  
    
    # this line of code split each movie at the colon ':' to separate key and value
    title, description = movie.strip().split(":") 
    
    # here, any leading or trailing whitespace is stripped and
    # a key-value pair is added to our dictionary(movies_dict) which are title-description respectively
    movies_data[title.strip()] = description.strip()  

print(movies_data)  # our movies is printed out as a dictionary


{'Movie A': 'When Hiccup discovers Toothless isn\'t the only Night Fury, he must seek "The Hidden World", a secret Dragon Utopia before a hired tyrant named Grimmel finds it first.', 'Movie B': 'After the death of Superman, several new people present themselves as possible successors.', 'Movie C': 'A darkness swirls at the center of a world-renowned dance company, one that will engulf the artistic director, an ambitious young dancer, and a grieving psychotherapist. Some will succumb to the nightmare. Others will finally wake up.', 'Movie D': "A humorous take on Sir Arthur Conan Doyle's classic mysteries featuring Sherlock Holmes and Doctor Watson.", 'Movie E': 'A 16-year-old girl and her extended family are left reeling after her calculating grandmother unveils an array of secrets on her deathbed.', 'Movie F': "In the last moments of World War II, a young German soldier fighting for survival finds a Nazi captain's uniform. Impersonating an officer, the man quickly takes on the monstrou

In [3]:
# We'll transform movie descriptions into numerical vectors using spaCy's word vectors.

def get_vector(text): 
    """Takes any text of our choice as input in order to transform our text into numerical vectors"""
    # this code tokenize our text
    doc = nlp(text)
    # and return tokenize text . vector
    return doc.vector

# Create a new dictionary to store movie vectors
movies_vectors = {}

# Iterate over movie titles and descriptions in movies_dict
for title, description in movies_data.items():
    
    # this line code call our function and our text of choice to input is the movie description
    # and we get the vector representation of the movie description
    vector = get_vector(description)
    
    # we store the vector in the movies_vectors dictionary with the movie title as the key
    movies_vectors[title] = vector

#for title, vector in movies_vectors.items():
    #print(title, ":", vector)

In [4]:
# a variable (Planet_Hulk) is created 
# to store the description of the movies we are finding similar movies for from the movies dictionary .

Planet_Hulk = '''
Will he save
their world or destroy it? When the Hulk becomes too dangerous for the
Earth, the Illuminati trick Hulk into a shuttle and launch him into space to a
planet where the Hulk can live in peace. Unfortunately, Hulk lands on the
planet Sakaar where he is sold into slavery and trained as a gladiator.
'''
# we have to vectorize the description of the Planet Hulk movies inorder to compare with other movies in our dictionary
Planet_Hulk_vector = get_vector(Planet_Hulk)
Planet_Hulk_vector = Planet_Hulk_vector.reshape(1, -1)  # reshape into a 2D array to make sure our description is fit for feed into our model


In [10]:
# import models to find similaritys between movies
from sklearn.metrics.pairwise import cosine_similarity

# a dictionary named scores is created to store the similarity scores between different movies 
scores = {}


def movies_to_recommend(title, vector):
    """Calculates similarity scores and recommend movies based on this scores"""
    
    # create a for loop to iterate over title and vector in each movies_vectors dictionary 
    for title, vector in movies_vectors.items(): 
        
        # the line of code below calculates the cosine similarity between the planet Hulk description vector 
        #and that of our movies_vectors, our movies_vectors is also reshape and similarity score modified to single scalar value 
        similarity = cosine_similarity(Planet_Hulk_vector, vector.reshape(1, -1))[0][0]
        
        # the dictionary variable(scores) is assigned a key (title) and value (similarity),
        #thereby associates the similarity score with the corresponding movie title in the scores dictionary.
        scores[title] = similarity

movies_to_recommend(title, vector) # we call our function


# this line of code finds the film with the highest similarity score
recommended_film = max(scores, key=scores.get)
print("Similarity Scores:")

#this line of code will iterate over each movie and score in the scores dictionary
for movie, score in scores.items():
    print(f"{movie}: {score :.2f}")

# Print the recommended film with the highest similarity score
print(f"Recommended Film: {recommended_film}")


Similarity Scores:
Movie A: 0.84
Movie B: 0.84
Movie C: 0.91
Movie D: 0.54
Movie E: 0.73
Movie F: 0.89
Movie G: 0.90
Movie H: 0.83
Movie I: 0.84
Movie J: 0.75
Recommended Film: Movie C
