In [6]:
pip install colorama


Note: you may need to restart the kernel to use updated packages.


In [7]:
import numpy as np
import pandas as pd
import ast
import time
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
from colorama import init, Fore, Style

# Initialize colorama
init(autoreset=True)

class MovieRecommender:
    def __init__(self):
        self.movies = None
        self.new_df = None
        self.similarity = None
        self.cv = CountVectorizer(max_features=5000, stop_words='english')
        self.ps = PorterStemmer()

    def load_data(self, movies_path, credits_path):
        """Load movies and credits data from CSV files."""
        print(Fore.YELLOW + "Loading data...")
        start_time = time.time()
        movies = pd.read_csv(movies_path)
        credits = pd.read_csv(credits_path)
        self.movies = movies.merge(credits, on="title")
        end_time = time.time()
        print(Fore.GREEN + "Data loaded and merged successfully in {:.2f} seconds.".format(end_time - start_time))
        print(Fore.CYAN + "Sample movie titles:")
        print(self.movies[['title']].head())

    def preprocess_data(self):
        """Preprocess the data to create tags and calculate similarity matrix."""
        if self.movies is None:
            raise Exception("Data not loaded. Call load_data() first.")

        print(Fore.YELLOW + "Preprocessing data...")
        start_time = time.time()

        # Select relevant columns
        self.movies = self.movies[['id', 'title', 'cast', 'crew', 'genres', 'keywords', 'overview']]
        self.movies.dropna(inplace=True)
        self.movies.drop_duplicates(inplace=True)

        # Convert strings to lists
        self.movies['genres'] = self.movies['genres'].apply(self.convert)
        self.movies['keywords'] = self.movies['keywords'].apply(self.convert)
        self.movies['cast'] = self.movies['cast'].apply(self.convert_cast)
        self.movies['crew'] = self.movies['crew'].apply(self.fetch_director)

        # Process overview text
        self.movies['overview'] = self.movies['overview'].apply(lambda x: x.split())

        # Create 'tags' column
        self.movies['tags'] = (self.movies['overview'] + self.movies['cast'] + 
                               self.movies['crew'] + self.movies['keywords'] + self.movies['genres'])

        # Combine tags into single string and lowercase
        self.movies['tags'] = self.movies['tags'].apply(lambda x: " ".join(x).lower())

        # Stemming
        self.movies['tags'] = self.movies['tags'].apply(lambda x: " ".join([self.ps.stem(word) for word in x.split()]))

        # Create new DataFrame with necessary columns
        self.new_df = self.movies[['id', 'title', 'tags']]
        end_time = time.time()
        print(Fore.GREEN + "Data preprocessing completed in {:.2f} seconds.".format(end_time - start_time))

        # Vectorization
        print(Fore.YELLOW + "Vectorizing tags...")
        start_time = time.time()
        vectors = self.cv.fit_transform(self.new_df['tags']).toarray()
        end_time = time.time()
        print(Fore.GREEN + "Vectorization completed in {:.2f} seconds. Shape of vectors: {}".format(end_time - start_time, vectors.shape))

        # Cosine similarity matrix
        print(Fore.YELLOW + "Calculating cosine similarity...")
        start_time = time.time()
        self.similarity = cosine_similarity(vectors)
        end_time = time.time()
        print(Fore.GREEN + "Cosine similarity calculation completed in {:.2f} seconds. Shape of similarity matrix: {}".format(end_time - start_time, self.similarity.shape))

    def convert(self, obj):
        return [i['name'] for i in ast.literal_eval(obj)]

    def convert_cast(self, obj):
        return [i['name'] for i in ast.literal_eval(obj)[:3]]

    def fetch_director(self, obj):
        for i in ast.literal_eval(obj):
            if i['job'] == 'Director':
                return [i['name']]
        return []

    def recommend(self, movie, num_recommendations=5):
        """Recommend movies similar to the given movie."""
        if self.movies is None or self.new_df is None or self.similarity is None:
            raise Exception("Data not preprocessed. Call preprocess_data() first.")

        movie = movie.lower()
        titles = self.new_df['title'].str.lower().tolist()
        if movie not in titles:
            print(Fore.RED + f"Movie '{movie}' not found in the database.")
            return []

        movie_index = titles.index(movie)
        distances = self.similarity[movie_index]
        movies_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:num_recommendations+1]
        recommended_movies = [self.new_df.iloc[i[0]]['title'] for i in movies_list]
        return recommended_movies

    def save_model(self, movies_pkl_path='movies.pkl', similarity_pkl_path='similarity.pkl'):
        """Save preprocessed data and similarity matrix to pickle files."""
        if self.new_df is None or self.similarity is None:
            raise Exception("Data not preprocessed. Call preprocess_data() first.")
        
        with open(movies_pkl_path, 'wb') as f:
            pickle.dump(self.new_df, f)
        
        with open(similarity_pkl_path, 'wb') as f:
            pickle.dump(self.similarity, f)
        print(Fore.GREEN + "Model saved successfully.")

    def load_model(self, movies_pkl_path='movies.pkl', similarity_pkl_path='similarity.pkl'):
        """Load preprocessed data and similarity matrix from pickle files."""
        with open(movies_pkl_path, 'rb') as f:
            self.new_df = pickle.load(f)
        
        with open(similarity_pkl_path, 'rb') as f:
            self.similarity = pickle.load(f)
        print(Fore.GREEN + "Model loaded successfully.")

    def run_example(self):
        """Run example usage of the Movie Recommender."""
        print(Fore.MAGENTA + Style.BRIGHT + "Welcome to Movie Recommender!")
        while True:
            print(Fore.CYAN + "\nEnter the name of a movie to get recommendations (or 'exit' to quit): ")
            movie = input().strip()
            if movie.lower() == 'exit':
                print(Fore.YELLOW + "Thank you for using Movie Recommender. Goodbye!")
                break
            try:
                recommendations = self.recommend(movie)
                if recommendations:
                    print(Fore.MAGENTA + f"\nMovies similar to '{movie}':")
                    for i, recommendation in enumerate(recommendations):
                        print(Fore.GREEN + f"{i+1}. {recommendation}")
                else:
                    print(Fore.RED + f"No recommendations found for '{movie}'.")
            except Exception as e:
                print(Fore.RED + f"An error occurred: {e}")

if __name__ == "__main__":
    recommender = MovieRecommender()

    # Load and preprocess data
    recommender.load_data("C:\\Users\\Lenovo\\Downloads\\tmdb_5000_movies.csv (1)\\tmdb_5000_movies.csv",
                          "C:\\Users\\Lenovo\\Downloads\\tmdb_5000_credits.csv (1)\\tmdb_5000_credits.csv")
    recommender.preprocess_data()

    # Save preprocessed data and similarity matrix
    recommender.save_model()

    # Run example usage
    recommender.run_example()


Loading data...
Data loaded and merged successfully in 0.42 seconds.
Sample movie titles:
                                      title
0                                    Avatar
1  Pirates of the Caribbean: At World's End
2                                   Spectre
3                     The Dark Knight Rises
4                               John Carter
Preprocessing data...
Data preprocessing completed in 9.86 seconds.
Vectorizing tags...
Vectorization completed in 0.29 seconds. Shape of vectors: (4806, 5000)
Calculating cosine similarity...
Cosine similarity calculation completed in 1.28 seconds. Shape of similarity matrix: (4806, 4806)
Model saved successfully.
Welcome to Movie Recommender!

Enter the name of a movie to get recommendations (or 'exit' to quit): 


 Spectre



Movies similar to 'Spectre':
1. Quantum of Solace
2. Skyfall
3. Never Say Never Again
4. Die Another Day
5. Dr. No

Enter the name of a movie to get recommendations (or 'exit' to quit): 


 Dr. No



Movies similar to 'Dr. No':
1. You Only Live Twice
2. From Russia with Love
3. On Her Majesty's Secret Service
4. Never Say Never Again
5. Goldfinger

Enter the name of a movie to get recommendations (or 'exit' to quit): 


 exit


Thank you for using Movie Recommender. Goodbye!


In [1]:
import numpy as np
import pandas as pd
import ast
import time
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
from colorama import init, Fore, Style

# Initialize colorama
init(autoreset=True)


In [2]:
class MovieRecommender:
    def __init__(self):
        self.movies = None
        self.new_df = None
        self.similarity = None
        self.cv = CountVectorizer(max_features=5000, stop_words='english')
        self.ps = PorterStemmer()


In [3]:
def load_data(self, movies_path, credits_path):
    """Load movies and credits data from CSV files."""
    print(Fore.YELLOW + "Loading data...")
    start_time = time.time()
    movies = pd.read_csv(movies_path)
    credits = pd.read_csv(credits_path)
    self.movies = movies.merge(credits, on="title")
    end_time = time.time()
    print(Fore.GREEN + "Data loaded and merged successfully in {:.2f} seconds.".format(end_time - start_time))
    print(Fore.CYAN + "Sample movie titles:")
    print(self.movies[['title']].head())


In [4]:
def preprocess_data(self):
    """Preprocess the data to create tags and calculate similarity matrix."""
    if self.movies is None:
        raise Exception("Data not loaded. Call load_data() first.")

    print(Fore.YELLOW + "Preprocessing data...")
    start_time = time.time()

    # Select relevant columns
    self.movies = self.movies[['id', 'title', 'cast', 'crew', 'genres', 'keywords', 'overview']]
    self.movies.dropna(inplace=True)
    self.movies.drop_duplicates(inplace=True)

    # Convert strings to lists
    self.movies['genres'] = self.movies['genres'].apply(self.convert)
    self.movies['keywords'] = self.movies['keywords'].apply(self.convert)
    self.movies['cast'] = self.movies['cast'].apply(self.convert_cast)
    self.movies['crew'] = self.movies['crew'].apply(self.fetch_director)

    # Process overview text
    self.movies['overview'] = self.movies['overview'].apply(lambda x: x.split())

    # Create 'tags' column
    self.movies['tags'] = (self.movies['overview'] + self.movies['cast'] + 
                           self.movies['crew'] + self.movies['keywords'] + self.movies['genres'])

    # Combine tags into single string and lowercase
    self.movies['tags'] = self.movies['tags'].apply(lambda x: " ".join(x).lower())

    # Stemming
    self.movies['tags'] = self.movies['tags'].apply(lambda x: " ".join([self.ps.stem(word) for word in x.split()]))

    # Create new DataFrame with necessary columns
    self.new_df = self.movies[['id', 'title', 'tags']]
    end_time = time.time()
    print(Fore.GREEN + "Data preprocessing completed in {:.2f} seconds.".format(end_time - start_time))

    # Vectorization
    print(Fore.YELLOW + "Vectorizing tags...")
    start_time = time.time()
    vectors = self.cv.fit_transform(self.new_df['tags']).toarray()
    end_time = time.time()
    print(Fore.GREEN + "Vectorization completed in {:.2f} seconds. Shape of vectors: {}".format(end_time - start_time, vectors.shape))

    # Cosine similarity matrix
    print(Fore.YELLOW + "Calculating cosine similarity...")
    start_time = time.time()
    self.similarity = cosine_similarity(vectors)
    end_time = time.time()
    print(Fore.GREEN + "Cosine similarity calculation completed in {:.2f} seconds. Shape of similarity matrix: {}".format(end_time - start_time, self.similarity.shape))


In [5]:
def convert(self, obj):
    return [i['name'] for i in ast.literal_eval(obj)]

def convert_cast(self, obj):
    return [i['name'] for i in ast.literal_eval(obj)[:3]]

def fetch_director(self, obj):
    for i in ast.literal_eval(obj):
        if i['job'] == 'Director':
            return [i['name']]
    return []


In [6]:
def recommend(self, movie, num_recommendations=5):
    """Recommend movies similar to the given movie."""
    if self.movies is None or self.new_df is None or self.similarity is None:
        raise Exception("Data not preprocessed. Call preprocess_data() first.")

    movie = movie.lower()
    titles = self.new_df['title'].str.lower().tolist()
    if movie not in titles:
        print(Fore.RED + f"Movie '{movie}' not found in the database.")
        return []

    movie_index = titles.index(movie)
    distances = self.similarity[movie_index]
    movies_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:num_recommendations+1]
    recommended_movies = [self.new_df.iloc[i[0]]['title'] for i in movies_list]
    return recommended_movies


In [7]:
def save_model(self, movies_pkl_path='movies.pkl', similarity_pkl_path='similarity.pkl'):
    """Save preprocessed data and similarity matrix to pickle files."""
    if self.new_df is None or self.similarity is None:
        raise Exception("Data not preprocessed. Call preprocess_data() first.")
    
    with open(movies_pkl_path, 'wb') as f:
        pickle.dump(self.new_df, f)
    
    with open(similarity_pkl_path, 'wb') as f:
        pickle.dump(self.similarity, f)
    print(Fore.GREEN + "Model saved successfully.")

def load_model(self, movies_pkl_path='movies.pkl', similarity_pkl_path='similarity.pkl'):
    """Load preprocessed data and similarity matrix from pickle files."""
    with open(movies_pkl_path, 'rb') as f:
        self.new_df = pickle.load(f)
    
    with open(similarity_pkl_path, 'rb') as f:
        self.similarity = pickle.load(f)
    print(Fore.GREEN + "Model loaded successfully.")


In [8]:
def run_example(self):
    """Run example usage of the Movie Recommender."""
    print(Fore.MAGENTA + Style.BRIGHT + "Welcome to Movie Recommender!")
    while True:
        print(Fore.CYAN + "\nEnter the name of a movie to get recommendations (or 'exit' to quit): ")
        movie = input().strip()
        if movie.lower() == 'exit':
            print(Fore.YELLOW + "Thank you for using Movie Recommender. Goodbye!")
            break
        try:
            recommendations = self.recommend(movie)
            if recommendations:
                print(Fore.MAGENTA + f"\nMovies similar to '{movie}':")
                for i, recommendation in enumerate(recommendations):
                    print(Fore.GREEN + f"{i+1}. {recommendation}")
            else:
                print(Fore.RED + f"No recommendations found for '{movie}'.")
        except Exception as e:
            print(Fore.RED + f"An error occurred: {e}")


In [9]:
if __name__ == "__main__":
    recommender = MovieRecommender()

    # Load and preprocess data
    recommender.load_data("C:\\Users\\Lenovo\\Downloads\\tmdb_5000_movies.csv (1)\\tmdb_5000_movies.csv",
                          "C:\\Users\\Lenovo\\Downloads\\tmdb_5000_credits.csv (1)\\tmdb_5000_credits.csv")
    recommender.preprocess_data()

    # Save preprocessed data and similarity matrix
    recommender.save_model()

    # Run example usage
    recommender.run_example()


AttributeError: 'MovieRecommender' object has no attribute 'load_data'