<a href="https://colab.research.google.com/github/larwar123/movie-reco/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
!pip uninstall numpy scikit-surprise -y

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: scikit-surprise 1.1.4
Uninstalling scikit-surprise-1.1.4:
  Successfully uninstalled scikit-surprise-1.1.4


In [19]:
!pip install numpy==1.26.4
!pip install scikit-surprise

Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/18.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/18.3 MB[0m [31m64.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/18.3 MB[0m [31m113.7 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━[0m [32m14.0/18.3 MB[0m [31m172.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [13]:
### **Final Interactive Recommender System Code**


import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split

# --- Section 1: Load and Prepare Our Data ---

try:
    movies = pd.read_csv('/content/movies.csv')
    ratings = pd.read_csv('/content/ratings.csv')
    print("Datasets loaded successfully.")
    print(f"Ratings columns: {ratings.columns.tolist()}")
    print(f"Movies columns: {movies.columns.tolist()}")

except FileNotFoundError:
    print("Error: movies.csv or ratings.csv not found.")
    exit()

# --- Section 2: Build the Content-Based Recommender ---

movies['genres'] = movies['genres'].str.replace('|', ' ')
movies['genres'] = movies['genres'].fillna('')

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

indices = pd.Series(movies.index, index=movies['title'].str.lower()).drop_duplicates()

def get_content_recommendations(title, top_n=10):
    """
    Finds and returns the top N movies with similar genres to a given movie.
    """
    cleaned_input_title = title.lower().strip()

    if cleaned_input_title not in indices.index:
        matches = movies['title'][movies['title'].str.lower().str.contains(cleaned_input_title, na=False, regex=False)]

        if matches.empty:
            print(f"Movie title '{title}' not found in the dataset.")
            return []
        else:
            best_match_title = matches.iloc[0]
            print(f"Movie title '{title}' not found. Using closest match: '{best_match_title}'")
            idx = indices[best_match_title.lower().strip()]
    else:
        idx = indices[cleaned_input_title]

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n + 1]

    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices].tolist()


# --- Section 3: Build the Collaborative Filtering Recommender ---

reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

trainset, _ = train_test_split(data, test_size=0.01)

algo = SVD()
algo.fit(trainset)

def get_collaborative_recommendations(user_id, top_n=10):
    """
    Finds and returns the top N movies a user is predicted to enjoy.
    """
    all_movie_ids = ratings['movieId'].unique()
    rated_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    unrated_movies = [m for m in all_movie_ids if m not in rated_movies]

    predictions = [algo.predict(user_id, movie_id) for movie_id in unrated_movies]
    predictions = sorted(predictions, key=lambda x: x.est, reverse=True)
    top_n_movie_ids = [pred.iid for pred in predictions[:top_n]]

    recommended_titles = movies[movies['id'].isin(top_n_movie_ids)]['title'].tolist()
    return recommended_titles


# --- Section 4: Combine into a Hybrid Recommender ---

def get_hybrid_recommendations(user_id, movie_title, top_n=10):
    """
    Generates recommendations by combining results from both content-based and collaborative filters.
    """
    print(f"\n--- Generating Hybrid Recommendations for User ID {user_id} based on preference for '{movie_title}' ---")

    content_recs = get_content_recommendations(movie_title, top_n=top_n)
    collab_recs = get_collaborative_recommendations(user_id, top_n=top_n)

    hybrid_recs = list(dict.fromkeys(collab_recs + content_recs))

    return hybrid_recs[:top_n]


# --- Section 5: Interactive Recommender Interface ---
# This part of the code allows a user to input their preferences in real-time.

def run_interactive_recommender():
    """
    Starts an interactive session to get user input and provide movie recommendations.
    """
    while True:
        try:
            print("\n--- Welcome to the Interactive Movie Recommender! ---")

            # Get user ID
            user_id_input = int(input("Please enter your User ID (e.g., 1, 10, 50): "))
            if user_id_input not in ratings['userId'].unique():
                print(f"User ID {user_id_input} not found. Please try another ID from the dataset.")
                continue

            # Get favorite movie title
            movie_title_input = input("Please enter a movie title you like (e.g., 'Toy Story', 'Inception'): ")

            # Get recommendations
            final_recommendations = get_hybrid_recommendations(user_id=user_id_input, movie_title=movie_title_input)

            # Print the results
            print("\nFinal Hybrid Recommendations:")
            for i, movie in enumerate(final_recommendations):
                print(f"{i+1}. {movie}")

            # Ask to continue
            another_round = input("\nWould you like another recommendation? (yes/no): ").lower()
            if another_round != 'yes':
                print("Thank you for using the recommender. Goodbye!")
                break

        except ValueError:
            print("Invalid input. Please enter a valid number for the User ID.")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

# Run the interactive session
run_interactive_recommender()

Datasets loaded successfully.
Ratings columns: ['userId', 'movieId', 'rating', 'timestamp']
Movies columns: ['index', 'budget', 'genres', 'homepage', 'id', 'keywords', 'original_language', 'original_title', 'overview', 'popularity', 'production_companies', 'production_countries', 'release_date', 'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title', 'vote_average', 'vote_count', 'cast', 'crew', 'director']

--- Welcome to the Interactive Movie Recommender! ---
Please enter your User ID (e.g., 1, 10, 50): 1
Please enter a movie title you like (e.g., 'Toy Story', 'Inception'): Inception

--- Generating Hybrid Recommendations for User ID 1 based on preference for 'Inception' ---

Final Hybrid Recommendations:
1. Ghost Rider
2. The Sixth Sense
3. The Good Thief
4. Sky Captain and the World of Tomorrow
5. Paycheck
6. Oblivion
7. Minority Report
8. The Maze Runner
9. Congo
10. Knowing

Would you like another recommendation? (yes/no): yes

--- Welcome to the Interactive Movie

In [15]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split

# --- Section 1: Load and Prepare Our Data ---

try:
    movies = pd.read_csv('/content/movies.csv')
    ratings = pd.read_csv('/content/ratings.csv')
    print("Datasets loaded successfully.")
    print(f"Ratings columns: {ratings.columns.tolist()}")
    print(f"Movies columns: {movies.columns.tolist()}")

except FileNotFoundError:
    print("Error: movies.csv or ratings.csv not found.")
    exit()

# --- Section 2: Build the Content-Based Recommender ---

# This section has been updated to use more movie features for better recommendations.

# We'll create a 'soup' of text from various movie features.
# This makes our content-based model much more powerful.
def create_soup(x):
    """Combines key features into a single string for analysis."""
    genres = ' '.join(x['genres'].replace('|', ' ').split()) if isinstance(x['genres'], str) else ''
    keywords = ' '.join(x['keywords'].split()) if isinstance(x['keywords'], str) else ''
    director = x['director'].replace(' ', '') if isinstance(x['director'], str) else ''
    cast = ' '.join([c.replace(' ', '') for c in x['cast'].split()]) if isinstance(x['cast'], str) else ''

    return f"{genres} {keywords} {director} {cast}"

# Fill any missing values with an empty string
for col in ['genres', 'keywords', 'director', 'cast']:
    if col in movies.columns:
        movies[col].fillna('', inplace=True)
    else:
        # Create a blank column if it doesn't exist to prevent errors
        movies[col] = ''

# Create the new 'soup' column
movies['soup'] = movies.apply(create_soup, axis=1)

# Now, we use the soup to build our TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['soup'])

# We recalculate the similarity matrix with the new, richer data
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Create a lookup table using the 'title' column from your movies dataset.
indices = pd.Series(movies.index, index=movies['title'].str.lower()).drop_duplicates()

def get_content_recommendations(title, top_n=10):
    """
    Finds and returns the top N movies with similar content (genres, keywords, cast, director).
    """
    cleaned_input_title = title.lower().strip()

    if cleaned_input_title not in indices.index:
        matches = movies['title'][movies['title'].str.lower().str.contains(cleaned_input_title, na=False, regex=False)]

        if matches.empty:
            print(f"Movie title '{title}' not found in the dataset.")
            return []
        else:
            best_match_title = matches.iloc[0]
            print(f"Movie title '{title}' not found. Using closest match: '{best_match_title}'")
            idx = indices[best_match_title.lower().strip()]
    else:
        idx = indices[cleaned_input_title]

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n + 1]

    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices].tolist()


# --- Section 3: Build the Collaborative Filtering Recommender ---

reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

trainset, _ = train_test_split(data, test_size=0.01)

algo = SVD()
algo.fit(trainset)

def get_collaborative_recommendations(user_id, top_n=10):
    """
    Finds and returns the top N movies a user is predicted to enjoy.
    """
    all_movie_ids = ratings['movieId'].unique()
    rated_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    unrated_movies = [m for m in all_movie_ids if m not in rated_movies]

    predictions = [algo.predict(user_id, movie_id) for movie_id in unrated_movies]
    predictions = sorted(predictions, key=lambda x: x.est, reverse=True)
    top_n_movie_ids = [pred.iid for pred in predictions[:top_n]]

    recommended_titles = movies[movies['id'].isin(top_n_movie_ids)]['title'].tolist()
    return recommended_titles


# --- Section 4: Combine into a Hybrid Recommender ---

def get_hybrid_recommendations(user_id, movie_title, top_n=10):
    """
    Generates recommendations by combining results from both content-based and collaborative filters.
    """
    print(f"\n--- Generating Hybrid Recommendations for User ID {user_id} based on preference for '{movie_title}' ---")

    content_recs = get_content_recommendations(movie_title, top_n=top_n)
    collab_recs = get_collaborative_recommendations(user_id, top_n=top_n)

    hybrid_recs = list(dict.fromkeys(collab_recs + content_recs))

    return hybrid_recs[:top_n]


# --- Section 5: Interactive Recommender Interface ---

def run_interactive_recommender():
    """
    Starts an interactive session to get user input and provide movie recommendations.
    """
    while True:
        try:
            print("\n--- Welcome to the Interactive Movie Recommender! ---")

            user_id_input = int(input("Please enter your User ID (e.g., 1, 10, 50): "))
            if user_id_input not in ratings['userId'].unique():
                print(f"User ID {user_id_input} not found. Please try another ID from the dataset.")
                continue

            movie_title_input = input("Please enter a movie title you like (e.g., 'Toy Story', 'Inception'): ")

            final_recommendations = get_hybrid_recommendations(user_id=user_id_input, movie_title=movie_title_input)

            print("\nFinal Hybrid Recommendations:")
            for i, movie in enumerate(final_recommendations):
                print(f"{i+1}. {movie}")

            another_round = input("\nWould you like another recommendation? (yes/no): ").lower()
            if another_round != 'yes':
                print("Thank you for using the recommender. Goodbye!")
                break

        except ValueError:
            print("Invalid input. Please enter a valid number for the User ID.")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

run_interactive_recommender()

Datasets loaded successfully.
Ratings columns: ['userId', 'movieId', 'rating', 'timestamp']
Movies columns: ['index', 'budget', 'genres', 'homepage', 'id', 'keywords', 'original_language', 'original_title', 'overview', 'popularity', 'production_companies', 'production_countries', 'release_date', 'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title', 'vote_average', 'vote_count', 'cast', 'crew', 'director']


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  movies[col].fillna('', inplace=True)



--- Welcome to the Interactive Movie Recommender! ---
Please enter your User ID (e.g., 1, 10, 50): 4
Please enter a movie title you like (e.g., 'Toy Story', 'Inception'): Avatar

--- Generating Hybrid Recommendations for User ID 4 based on preference for 'Avatar' ---

Final Hybrid Recommendations:
1. Guardians of the Galaxy
2. Aliens
3. Alien
4. Galaxy Quest
5. Star Trek Into Darkness
6. Star Trek Beyond
7. Gravity
8. Alien³
9. Cargo
10. Jason X

Would you like another recommendation? (yes/no): no
Thank you for using the recommender. Goodbye!
