In [None]:
!pip install numpy pandas scikit-learn scipy



In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds

In [29]:
movies = pd.read_csv("/content/movies.csv")
ratings = pd.read_csv("/content/ratings.csv")
tags = pd.read_csv("/content/tags.csv")
links =pd.read_csv("/content/links.csv")


In [30]:
# Combine all tags of each movie
tags_grouped = tags.groupby("movieId")["tag"].apply(lambda x: " ".join(x)).reset_index()

# Merge into movies
movies = movies.merge(tags_grouped, on="movieId", how="left")

movies["tag"] = movies["tag"].fillna("")

In [31]:
movies["genres"] = movies["genres"].str.replace("|", " ", regex=False)

# Combine genres + tags
movies["metadata"] = movies["genres"] + " " + movies["tag"]

In [32]:
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(movies["metadata"])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

indices = pd.Series(movies.index, index=movies["title"]).drop_duplicates()

def content_recommend(title, n=10):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:n+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies["title"].iloc[movie_indices]

In [33]:
user_item_matrix = ratings.pivot_table(
    index='userId',
    columns='movieId',
    values='rating'
).fillna(0)

In [34]:
matrix = user_item_matrix.values

U, sigma, Vt = svds(matrix, k=50)

sigma = np.diag(sigma)

predicted_ratings = np.dot(np.dot(U, sigma), Vt)

predicted_df = pd.DataFrame(
    predicted_ratings,
    columns=user_item_matrix.columns,
    index=user_item_matrix.index
)

In [35]:
def collaborative_recommend(user_id, n=10):

    user_row = predicted_df.loc[user_id]

    already_rated = ratings[ratings.userId == user_id]['movieId']

    recommendations = user_row.drop(already_rated)

    top_movies = recommendations.sort_values(ascending=False).head(n)

    return movies[movies.movieId.isin(top_movies.index)][['movieId', 'title']]

In [36]:
def hybrid_recommend(user_id, title, n=10, alpha=0.6):

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))

    hybrid_scores = []

    for i, sim_score in sim_scores:
        movie_id = movies.iloc[i]["movieId"]

        if user_id in predicted_df.index and movie_id in predicted_df.columns:
            collab_score = predicted_df.loc[user_id, movie_id]
        else:
            collab_score = 0

        final_score = alpha * sim_score + (1 - alpha) * collab_score

        hybrid_scores.append((i, final_score))

    hybrid_scores = sorted(hybrid_scores, key=lambda x: x[1], reverse=True)
    hybrid_scores = hybrid_scores[1:n+1]

    movie_indices = [i[0] for i in hybrid_scores]

    return movies.iloc[movie_indices][["title"]]

In [37]:
def hybrid_recommend(user_id, title, n=10, alpha=0.6):

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))

    hybrid_scores = []

    for i, sim_score in sim_scores:
        movie_id = movies.iloc[i]["movieId"]

        if user_id in predicted_df.index and movie_id in predicted_df.columns:
            collab_score = predicted_df.loc[user_id, movie_id]
        else:
            collab_score = 0

        final_score = alpha * sim_score + (1 - alpha) * collab_score

        hybrid_scores.append((i, final_score))

    hybrid_scores = sorted(hybrid_scores, key=lambda x: x[1], reverse=True)
    hybrid_scores = hybrid_scores[1:n+1]

    movie_indices = [i[0] for i in hybrid_scores]

    return movies.iloc[movie_indices][["title"]]

In [38]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(
    ratings,
    test_size=0.2,
    random_state=42
)

In [39]:
train_matrix = train_data.pivot_table(
    index='userId',
    columns='movieId',
    values='rating'
)

In [40]:
user_means = train_matrix.mean(axis=1)

train_matrix_centered = train_matrix.sub(user_means, axis=0)
train_matrix_centered = train_matrix_centered.fillna(0)

In [41]:
from scipy.sparse.linalg import svds
import numpy as np

matrix = train_matrix_centered.values

U, sigma, Vt = svds(matrix, k=30)   # tryed 20-50

sigma = np.diag(sigma)

predicted = np.dot(np.dot(U, sigma), Vt)

predicted += user_means.values.reshape(-1, 1)

predicted_df = pd.DataFrame(
    predicted,
    columns=train_matrix.columns,
    index=train_matrix.index
)

In [42]:
ratings = ratings.groupby('movieId').filter(lambda x: len(x) >= 10)
ratings = ratings.groupby('userId').filter(lambda x: len(x) >= 10)

In [43]:
from sklearn.metrics import mean_squared_error
import numpy as np

actual = []
predicted = []

for row in test_data.itertuples():
    user = row.userId
    movie = row.movieId
    rating = row.rating

    if user in predicted_df.index and movie in predicted_df.columns:
        pred_rating = predicted_df.loc[user, movie]

        actual.append(rating)
        predicted.append(pred_rating)

rmse = np.sqrt(mean_squared_error(actual, predicted))
print("Test RMSE:", rmse)

Test RMSE: 0.9234740409506537


In [44]:
actual_train = []
predicted_train = []

for row in train_data.itertuples():
    user = row.userId
    movie = row.movieId
    rating = row.rating

    if user in predicted_df.index and movie in predicted_df.columns:
        pred_rating = predicted_df.loc[user, movie]
        actual_train.append(rating)
        predicted_train.append(pred_rating)

rmse_train = np.sqrt(mean_squared_error(actual_train, predicted_train))
print("Train RMSE:", rmse_train)

Train RMSE: 0.7013678936917671


In [45]:
print("Train size:", len(train_data))
print("Test size:", len(test_data))
print("Evaluation samples:", len(actual))

Train size: 80668
Test size: 20168
Evaluation samples: 19355


In [46]:
def get_similar_movies(title, num_recommendations=3):

    if title not in indices:
        return "Movie not found"

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]

    movie_indices = [i[0] for i in sim_scores]

    return movies[['title','genres','tag']].iloc[movie_indices]

In [75]:
def find_similar_movies(movie_name, num_recommendations=10):

    movie_name = movie_name.lower()

    # Find closest matching movie title
    matches = movies[movies['title'].str.lower().str.contains(movie_name)]

    if matches.empty:
        return "Movie not found"

    # Take first matched movie
    title = matches.iloc[0]['title']

    idx = indices[title]

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]

    movie_indices = [i[0] for i in sim_scores]

    result = movies[['title','genres','tag']].iloc[movie_indices]

    print(f"\nMovies similar to: {title}")
    print("="*50)

    return result

In [78]:
find_similar_movies("inception",10)


Movies similar to: Inception (2010)


Unnamed: 0,title,genres,tag
9245,Eye in the Sky (2016),Drama Thriller War,suspense thought-provoking
3562,Donnie Darko (2001),Drama Mystery Sci-Fi Thriller,atmospheric cult film dreamlike hallucinatory ...
8697,Doctor Strange (2016),Action Adventure Sci-Fi,visually appealing
9392,Arrival (2016),Sci-Fi,beautiful visuals Cerebral cinematography good...
1243,Gattaca (1997),Drama Sci-Fi Thriller,future intelligent thought-provoking
7258,Shutter Island (2010),Drama Mystery Thriller,insanity Leonardo DiCaprio Martin Scorsese plo...
533,Primal Fear (1996),Crime Drama Mystery Thriller,edward norton psychology suspense thought-prov...
4909,Eternal Sunshine of the Spotless Mind (2004),Drama Romance Sci-Fi,alternate reality memory thought-provoking jim...
8376,Interstellar (2014),Sci-Fi IMAX,black hole sci-fi time-travel Christopher Nola...
9089,Silence (2016),Drama Thriller,contemplative philosophical thought-provoking ...


# Task
```python
app_code += """

# Streamlit UI
st.set_page_config(layout="wide")
st.markdown(
    f'''
    <style>
        .reportview-container .main .block-container{{
            max-width: 1200px;
            padding-top: 2rem;
            padding-right: 2rem;
            padding-left: 2rem;
            padding-bottom: 2rem;
        }}
        .full-width-image {{
            width: 100%;
            height: 200px; /* Fixed height for consistency */
            object-fit: cover; /* Cover the area, cropping if necessary */
            border-radius: 8px;
            margin-bottom: 10px;
        }}
        .movie-card {{
            border: 1px solid #ddd;
            border-radius: 8px;
            padding: 10px;
            margin-bottom: 15px;
            height: 400px; /* Fixed height for movie card */
            display: flex;
            flex-direction: column;
            justify-content: space-between;
            overflow: hidden; /* Hide overflow if text is too long */
        }}
        .movie-title {{
            font-size: 1.1em;
            font-weight: bold;
            min-height: 50px; /* Ensure title space */
        }}
        .movie-details {{
            font-size: 0.9em;
            color: #555;
            flex-grow: 1; /* Allow details to take available space */
            overflow: hidden; /* Hide overflow */
            text-overflow: ellipsis; /* Add ellipsis for overflowing text */
        }}
        .stButton>button {{
            width: 100%;
        }}
    </style>
    ''',
    unsafe_allow_html=True
)

st.title("Movie Recommender System")

# Navigation
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", ["Content-Based", "Collaborative Filtering", "Hybrid Recommendation", "Find Similar Movies"])

# Helper function to display movie recommendations with posters
def display_recommendations(movie_list):
    if isinstance(movie_list, pd.Series) and movie_list.empty:
        st.write("No recommendations found.")
        return
    if movie_list.empty:
        st.write("No recommendations found.")
        return

    num_movies = len(movie_list)
    cols_per_row = 5
    num_rows = (num_movies + cols_per_row - 1) // cols_per_row

    for i in range(num_rows):
        cols = st.columns(cols_per_row)
        for j in range(cols_per_row):
            idx = i * cols_per_row + j
            if idx < num_movies:
                with cols[j]:
                    movie = movie_list.iloc[idx]
                    title = movie['title']
                    genres = movie['genres']
                    tag = movie['tag']
                    imdb_id = movie['imdbId']

                    poster_url = get_poster(imdb_id)
                    
                    st.markdown(
                        f'''
                        <div class="movie-card">
                            <div>
                                <img src="{poster_url}" class="full-width-image" onerror="this.onerror=null;this.src='https://via.placeholder.com/200x300.png?text=No+Poster';"/>
                                <div class="movie-title">{title}</div>
                                <div class="movie-details">
                                    <strong>Genres:</strong> {genres}<br>
                                    <strong>Tags:</strong> {tag}
                                </div>
                            </div>
                        </div>
                        ''',
                        unsafe_allow_html=True
                    )

if page == "Content-Based":
    st.header("Content-Based Recommendations")
    movie_title = st.selectbox("Select a movie:", movies['title'].sort_values().unique())
    if st.button("Get Content-Based Recommendations"):
        if movie_title:
            recommendations = content_recommend(movie_title)
            display_recommendations(recommendations)
        else:
            st.write("Please select a movie.")

elif page == "Collaborative Filtering":
    st.header("Collaborative Filtering Recommendations")
    # For simplicity, let's use existing user IDs from the filtered ratings dataframe
    # In a real app, you might have user login or ask for a new user's preferences
    available_user_ids = sorted(ratings['userId'].unique())
    user_id = st.selectbox("Select a User ID:", available_user_ids)
    if st.button("Get Collaborative Recommendations"):
        if user_id:
            recommendations = collaborative_recommend(user_id)
            display_recommendations(recommendations)
        else:
            st.write("Please select a User ID.")

elif page == "Hybrid Recommendation":
    st.header("Hybrid Recommendations")
    available_user_ids = sorted(ratings['userId'].unique())
    user_id_hybrid = st.selectbox("Select a User ID for Hybrid:", available_user_ids)
    movie_title_hybrid = st.selectbox("Select a movie as a starting point:", movies['title'].sort_values().unique())
    alpha_val = st.slider("Adjust Alpha (Content vs. Collaborative weight):", 0.0, 1.0, 0.6, 0.1)
    if st.button("Get Hybrid Recommendations"):
        if user_id_hybrid and movie_title_hybrid:
            recommendations = hybrid_recommend(user_id_hybrid, movie_title_hybrid, alpha=alpha_val)
            display_recommendations(recommendations)
        else:
            st.write("Please select both a User ID and a movie.")

elif page == "Find Similar Movies":
    st.header("Find Similar Movies")
    search_query = st.text_input("Enter a movie title (or part of it):")
    if st.button("Search for Similar Movies"):
        if search_query:
            # Re-implement find_similar_movies to return DataFrame for display_recommendations
            def find_similar_movies_streamlit(movie_name, num_recommendations=10):
                movie_name = movie_name.lower()
                matches = movies[movies['title'].str.lower().str.contains(movie_name)]

                if matches.empty:
                    return pd.DataFrame() # Return empty DataFrame

                title = matches.iloc[0]['title']
                
                # Check if the title is in the indices, which might not be the case after filtering movies
                if title not in indices:
                    return pd.DataFrame()

                idx = indices[title]
                sim_scores = list(enumerate(cosine_sim[idx]))
                sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
                sim_scores = sim_scores[1:num_recommendations+1]
                movie_indices = [i[0] for i in sim_scores]
                
                return movies.iloc[movie_indices][['title', 'genres', 'tag', 'imdbId']]

            recommendations = find_similar_movies_streamlit(search_query)
            if not recommendations.empty:
                st.subheader(f"Movies similar to: {recommendations.iloc[0]['title']}")
                display_recommendations(recommendations)
            else:
                st.write("No similar movies found for your query.")
        else:
            st.write("Please enter a movie title to search.")
"""
print("Added Streamlit UI elements to app_code string.")
```
```python
# Write the app_code to app.py
with open("app.py", "w") as f:
    f.write(app_code)

# Terminate any previous Streamlit instances
!killall streamlit &>/dev/null

# Run Streamlit in the background
import subprocess
import time
import requests

print("Launching Streamlit app...")
process = subprocess.Popen(
    ["streamlit", "run", "app.py", "--server.port", "8501", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True
)

# Wait for Streamlit to start up
time.sleep(10)

# Setup ngrok
!pip install pyngrok -qq
from pyngrok import ngrok

# Terminate any open ngrok tunnels
ngrok.kill()

# Set up a new ngrok tunnel
NGROK_AUTH_TOKEN = "YOUR_NGROK_AUTH_TOKEN"  # Replace with your actual ngrok auth token
if NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN":
    print("Please replace 'YOUR_NGROK_AUTH_TOKEN' with your actual ngrok authentication token.")
    print("You can get one from https://ngrok.com/signup")
else:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    public_url = ngrok.connect(8501)
    print(f"Streamlit App URL: {public_url}")
    print("Please use the URL above to interact with the application.")

    # Check if Streamlit is actually running by making a request to its internal endpoint
    try:
        response = requests.get("http://localhost:8501")
        if response.status_code == 200:
            print("Streamlit app is running successfully locally.")
        else:
            print(f"Streamlit app is not accessible locally. Status code: {response.status_code}")
            print("Streamlit stdout:")
            print(process.stdout.read())
            print("Streamlit stderr:")
            print(process.stderr.read())
    except requests.exceptions.ConnectionError:
        print("Could not connect to Streamlit app locally.")
        print("Streamlit stdout:")
        print(process.stdout.read())
        print("Streamlit stderr:")
        print(process.stderr.read())

    # Keep the Colab environment alive for ngrok tunnel
    # This is a temporary measure; in a real scenario, you'd want a more robust solution
    # For now, a long sleep or user interaction is needed to prevent kernel from stopping
    # time.sleep(3600) # Keep alive for an hour - uncomment if needed for debugging
```
```python
# Final Task: Confirm the Streamlit application is running flawlessly, accessible via the generated public URL,
# and that all content-based, collaborative, hybrid, and similar movie recommendations work as expected,
# displaying movie titles and posters.

print("Please check the Streamlit App URL provided above.")
print("Verify the following functionalities:")
print("1. Content-Based Recommendations: Select a movie and check if relevant recommendations with posters are displayed.")
print("2. Collaborative Filtering Recommendations: Select a User ID and check if recommendations with posters are displayed.")
print("3. Hybrid Recommendation: Select a User ID and a movie, adjust alpha, and check if hybrid recommendations with posters are displayed.")
print("4. Find Similar Movies: Enter a movie title and check if similar movies with posters are displayed.")
print("If the application is not running or any functionality is broken, please provide feedback.")
```

In [102]:
app_code += """

# Function for finding similar movies for Streamlit display
def find_similar_movies_streamlit(movie_name, num_recommendations=10):
    movie_name = movie_name.lower()
    matches = movies[movies['title'].str.lower().str.contains(movie_name)]

    if matches.empty:
        return pd.DataFrame() # Return empty DataFrame if no match

    title = matches.iloc[0]['title']

    if title not in indices:
        return pd.DataFrame()

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]

    movie_indices = [i[0] for i in sim_scores]

    return movies.iloc[movie_indices][['title','genres','tag', 'imdbId']]
"""
print("Added find_similar_movies_streamlit function to app_code string.")

Added find_similar_movies_streamlit function to app_code string.


In [146]:
app_code = """
import streamlit as st
from PIL import Image
import requests
from io import BytesIO
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds

OMDB_API_KEY = "eae729f3" # Replace with your actual OMDB API key


# Load datasets
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")
tags = pd.read_csv("tags.csv")
links =pd.read_csv("links.csv")

# Merge imdbId into movies DataFrame
movies = movies.merge(links[['movieId', 'imdbId']], on='movieId', how='left')

# Combine all tags of each movie
tags_grouped = tags.groupby("movieId")["tag"].apply(lambda x: " ".join(x)).reset_index()

# Merge tags into movies
movies = movies.merge(tags_grouped, on="movieId", how="left")

# Fill NaN tags with empty string
movies["tag"] = movies["tag"].fillna("")

# Clean genres: replace '|' with space
movies["genres"] = movies["genres"].str.replace("|", " ", regex=False)

# Combine genres + tags for metadata
movies["metadata"] = movies["genres"] + " " + movies["tag"]

# Content-based Filtering Setup
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(movies["metadata"])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
indices = pd.Series(movies.index, index=movies["title"]).drop_duplicates()

# Content-based recommendation function
def content_recommend(title, n=10):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:n+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies.iloc[movie_indices][['title', 'genres', 'tag', 'imdbId']]

# Collaborative Filtering Setup
# Use original ratings for the main model, not the split one for evaluation
user_item_matrix = ratings.pivot_table(
    index='userId',
    columns='movieId',
    values='rating'
).fillna(0)

# Apply SVD
matrix = user_item_matrix.values
U, sigma, Vt = svds(matrix, k=50) # Using k=50 for the deployed model
sigma = np.diag(sigma)
predicted_ratings = np.dot(np.dot(U, sigma), Vt)
predicted_df = pd.DataFrame(
    predicted_ratings,
    columns=user_item_matrix.columns,
    index=user_item_matrix.index
)

# Collaborative filtering recommendation function
def collaborative_recommend(user_id, n=10):
    user_row = predicted_df.loc[user_id]
    already_rated = ratings[ratings.userId == user_id]['movieId']
    recommendations = user_row.drop(already_rated, errors='ignore')
    top_movies = recommendations.sort_values(ascending=False).head(n)
    return movies[movies.movieId.isin(top_movies.index)][['movieId', 'title', 'genres', 'tag', 'imdbId']]

# Hybrid recommendation function
def hybrid_recommend(user_id, title, n=10, alpha=0.6):
    idx = indices[title]
    sim_scores_content = list(enumerate(cosine_sim[idx]))

    hybrid_scores = []

    for i, sim_score in sim_scores_content:
        movie_id = movies.iloc[i]["movieId"]

        collab_score = 0
        if user_id in predicted_df.index and movie_id in predicted_df.columns:
            collab_score = predicted_df.loc[user_id, movie_id]

        final_score = alpha * sim_score[1] + (1 - alpha) * collab_score

        hybrid_scores.append((i, final_score))

    hybrid_scores = sorted(hybrid_scores, key=lambda x: x[1], reverse=True)
    hybrid_scores = hybrid_scores[1:n+1]

    movie_indices = [i[0] for i in hybrid_scores]

    return movies.iloc[movie_indices][["title", "genres", "tag", "imdbId"]]

# Function to get movie poster from OMDb API
def get_poster(imdb_id):
    if pd.isna(imdb_id) or imdb_id == '':
        return "https://via.placeholder.com/200x300.png?text=No+Poster"
    try:
        # Ensure imdb_id is prefixed with 'tt' and formatted correctly for the API
        if not str(imdb_id).startswith('tt'):
            imdb_id = f"tt{int(imdb_id):07d}"
        else:
            imdb_id = str(imdb_id)

        url = f"http://www.omdbapi.com/?i={imdb_id}&apikey={OMDB_API_KEY}"
        response = requests.get(url)
        data = response.json()
        if data and data.get("Poster") and data["Poster"] != "N/A":
            return data["Poster"]
        else:
            return "https://via.placeholder.com/200x300.png?text=No+Poster"
    except Exception as e:
        st.error(f"Error fetching poster for {imdb_id}: {e}")
        return "https://via.placeholder.com/200x300.png?text=No+Poster"

# Function for finding similar movies for Streamlit display
def find_similar_movies_streamlit(movie_name, num_recommendations=10):
    movie_name_lower = movie_name.lower()
    base_movie_title = None

    # 1. Try to find an exact match
    exact_match = movies[movies['title'].str.lower() == movie_name_lower]
    if not exact_match.empty:
        base_movie_title = exact_match.iloc[0]['title']
    else:
        # 2. If no exact match, find titles that start with the query
        starts_with_matches = movies[movies['title'].str.lower().str.startswith(movie_name_lower)]
        if not starts_with_matches.empty:
            # Prioritize shorter titles if multiple movies start with the query
            base_movie_title = starts_with_matches.loc[starts_with_matches['title'].str.len().idxmin()]['title']
        else:
            # 3. If no 'starts with' match, find titles that contain the query
            contains_matches = movies[movies['title'].str.lower().str.contains(movie_name_lower)]
            if not contains_matches.empty:
                # Prioritize shorter titles if multiple movies contain the query
                base_movie_title = contains_matches.loc[contains_matches['title'].str.len().idxmin()]['title']

    if base_movie_title is None or base_movie_title not in indices:
        return pd.DataFrame(), None # No suitable movie found or not in content-based index

    idx = indices[base_movie_title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]

    movie_indices = [i[0] for i in sim_scores]

    return movies.iloc[movie_indices][['title','genres','tag', 'imdbId']], base_movie_title


# Streamlit UI
st.set_page_config(layout="wide")
st.markdown(
    f'''
    <style>
        .reportview-container .main .block-container{{
            max-width: 1200px;
            padding-top: 2rem;
            padding-right: 2rem;
            padding-left: 2rem;
            padding-bottom: 2rem;
        }}
        .full-width-image {{
            width: 100%;
            height: 200px; /* Fixed height for consistency */
            object-fit: cover; /* Cover the area, cropping if necessary */
            border-radius: 8px;
            margin-bottom: 10px;
        }}
        .movie-card {{
            border: 1px solid #ddd;
            border-radius: 8px;
            padding: 10px;
            margin-bottom: 15px;
            height: 400px; /* Fixed height for movie card */
            display: flex;
            flex-direction: column;
            justify-content: space-between;
            overflow: hidden; /* Hide overflow if text is too long */
        }}
        .movie-title {{
            font-size: 1.1em;
            font-weight: bold;
            min-height: 50px; /* Ensure title space */
        }}
        .movie-details {{
            font-size: 0.9em;
            color: #555;
            flex-grow: 1; /* Allow details to take available space */
            overflow: hidden; /* Hide overflow */
            text-overflow: ellipsis; /* Add ellipsis for overflowing text */
        }}
        .stButton>button {{
            width: 100%;
        }}
        body {{
            background-image: url("https://images.unsplash.com/photo-1579547621113-e4bb2a19ff62?q=80&w=2670&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D") !important; /* A 3D modern-style background */
            background-size: cover !important;
            background-attachment: fixed !important; /* Keeps background fixed when scrolling */
            background-color: #000000 !important; /* Fallback for black background */
        }}
    </style>
    ''',
    unsafe_allow_html=True
)

st.title("Movie Recommender System")

# Navigation
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", ["Content-Based", "Collaborative Filtering", "Hybrid Recommendation", "Find Similar Movies"])

# Helper function to display movie recommendations with posters
def display_recommendations(movie_list):
    if isinstance(movie_list, pd.DataFrame) and movie_list.empty:
        st.write("No recommendations found.")
        return
    if movie_list.empty:
        st.write("No recommendations found.")
        return

    # Ensure movie_list is a DataFrame
    if isinstance(movie_list, pd.Series):
        # Convert Series of movie titles to DataFrame of movie details
        # This assumes 'movie_list' is a Series of movie titles from content_recommend
        # The content_recommend function should return a DataFrame directly with all columns.
        # If it returns a Series of titles, we need to lookup details here.
        # However, the updated content_recommend now returns a DataFrame.
        pass # No action needed if recommendation functions return DataFrame

    num_movies = len(movie_list)
    if num_movies == 0:
        st.write("No recommendations found.")
        return

    cols_per_row = 5
    num_rows = (num_movies + cols_per_row - 1) // cols_per_row

    for i in range(num_rows):
        cols = st.columns(cols_per_row)
        for j in range(cols_per_row):
            idx = i * cols_per_row + j
            if idx < num_movies:
                with cols[j]:
                    movie = movie_list.iloc[idx]
                    title = movie['title']
                    genres = movie['genres']
                    tag = movie['tag']
                    imdb_id = movie['imdbId']

                    poster_url = get_poster(imdb_id)

                    st.markdown(
                        f'''
                        <div class="movie-card">
                            <div>
                                <img src="{poster_url}" class="full-width-image" onerror="this.onerror=null;this.src='https://via.placeholder.com/200x300.png?text=No+Poster';"/>
                                <div class="movie-title">{title}</div>
                                <div class="movie-details">
                                    <strong>Genres:</strong> {genres}<br>
                                    <strong>Tags:</strong> {tag}
                                </div>
                            </div>
                        </div>
                        ''',
                        unsafe_allow_html=True
                    )

if page == "Content-Based":
    st.header("Content-Based Recommendations")
    movie_title = st.selectbox("Select a movie:", movies['title'].sort_values().unique())
    if st.button("Get Content-Based Recommendations"):
        if movie_title:
            recommendations = content_recommend(movie_title)
            display_recommendations(recommendations)
        else:
            st.write("Please select a movie.")

elif page == "Collaborative Filtering":
    st.header("Collaborative Filtering Recommendations")
    available_user_ids = sorted(ratings['userId'].unique())
    user_id = st.selectbox("Select a User ID:", available_user_ids)
    if st.button("Get Collaborative Recommendations"):
        if user_id:
            recommendations = collaborative_recommend(user_id)
            display_recommendations(recommendations)
        else:
            st.write("Please select a User ID.")

elif page == "Hybrid Recommendation":
    st.header("Hybrid Recommendations")
    available_user_ids = sorted(ratings['userId'].unique())
    user_id_hybrid = st.selectbox("Select a User ID for Hybrid:", available_user_ids)
    movie_title_hybrid = st.selectbox("Select a movie as a starting point:", movies['title'].sort_values().unique())
    alpha_val = st.slider("Adjust Alpha (Content vs. Collaborative weight):", 0.0, 1.0, 0.6, 0.1)
    if st.button("Get Hybrid Recommendations"):
        if user_id_hybrid and movie_title_hybrid:
            recommendations = hybrid_recommend(user_id_hybrid, movie_title_hybrid, alpha=alpha_val)
            display_recommendations(recommendations)
        else:
            st.write("Please select both a User ID and a movie.")

elif page == "Find Similar Movies":
    st.header("Find Similar Movies")
    search_query = st.text_input("Enter a movie title (or part of it): ")
    if st.button("Search for Similar Movies"):
        if search_query:
            recommendations, base_title_for_display = find_similar_movies_streamlit(search_query)
            if not recommendations.empty and base_title_for_display:
                st.subheader(f"Movies similar to: {base_title_for_display}")
                display_recommendations(recommendations)
            else:
                st.write("No similar movies found for your query.")
        else:
            st.write("Please enter a movie title to search.")
"""

# Write the app_code to app.py
with open("app.py", "w") as f:
    f.write(app_code)

print("Complete app.py code generated and written to app.py")

Complete app.py code generated and written to app.py


In [148]:
# Write the app_code to app.py
with open("app.py", "w") as f:
    f.write(app_code)

# Terminate any previous Streamlit instances
!killall streamlit &>/dev/null

# Run Streamlit in the background
import subprocess
import time
import requests

print("Launching Streamlit app...")
process = subprocess.Popen(
    ["streamlit", "run", "app.py", "--server.port", "8501", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True
)

# Wait for Streamlit to start up
time.sleep(15) # Increased sleep time

# Setup ngrok
!pip install pyngrok -qq
from pyngrok import ngrok

# Terminate any open ngrok tunnels
ngrok.kill()

# Set up a new ngrok tunnel
NGROK_AUTH_TOKEN = "39KNJq0UihIXfjxtyrXWZAUKyb8_4NWfLWWHPYJeJAjgEd8Nq"  # Replace with your actual ngrok auth token
if NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN":
    print("Please replace 'YOUR_NGROK_AUTH_TOKEN' with your actual ngrok authentication token.")
    print("You can get one from https://ngrok.com/signup")
else:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    public_url = ngrok.connect(8501)
    print(f"Streamlit App URL: {public_url}")
    print("Please use the URL above to interact with the application.")

    # Check if Streamlit is actually running by making a request to its internal endpoint
    try:
        response = requests.get("http://localhost:8501")
        if response.status_code == 200:
            print("Streamlit app is running successfully locally.")
        else:
            print(f"Streamlit app is not accessible locally. Status code: {response.status_code}")
            print("Streamlit stdout:")
            print(process.stdout.read())
            print("Streamlit stderr:")
            print(process.stderr.read())
    except requests.exceptions.ConnectionError:
        print("Could not connect to Streamlit app locally.")
        print("Streamlit stdout:")
        print(process.stdout.read())
        print("Streamlit stderr:")
        print(process.stderr.read())

    # Keep the Colab environment alive for ngrok tunnel
    # This is a temporary measure; in a real scenario, you'd want a more robust solution
    # For now, a long sleep or user interaction is needed to prevent kernel from stopping
    # time.sleep(3600) # Keep alive for an hour - uncomment if needed for debugging


Launching Streamlit app...




Streamlit App URL: NgrokTunnel: "https://cornual-undejectedly-toccara.ngrok-free.dev" -> "http://localhost:8501"
Please use the URL above to interact with the application.
Streamlit app is running successfully locally.


In [143]:
# Write the app_code to app.py
with open("app.py", "w") as f:
    f.write(app_code)

# Terminate any previous Streamlit instances
!killall streamlit &>/dev/null

# Run Streamlit in the background
import subprocess
import time
import requests

print("Launching Streamlit app...")
process = subprocess.Popen(
    ["streamlit", "run", "app.py", "--server.port", "8501", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True
)

# Wait for Streamlit to start up
time.sleep(15) # Increased sleep time

# Setup ngrok
!pip install pyngrok -qq
from pyngrok import ngrok

# Terminate any open ngrok tunnels
ngrok.kill()

# Set up a new ngrok tunnel
NGROK_AUTH_TOKEN = "39KNJq0UihIXfjxtyrXWZAUKyb8_4NWfLWWHPYJeJAjgEd8Nq"  # Replace with your actual ngrok auth token
if NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN":
    print("Please replace 'YOUR_NGROK_AUTH_TOKEN' with your actual ngrok authentication token.")
    print("You can get one from https://ngrok.com/signup")
else:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    public_url = ngrok.connect(8501)
    print(f"Streamlit App URL: {public_url}")
    print("Please use the URL above to interact with the application.")

    # Check if Streamlit is actually running by making a request to its internal endpoint
    try:
        response = requests.get("http://localhost:8501")
        if response.status_code == 200:
            print("Streamlit app is running successfully locally.")
        else:
            print(f"Streamlit app is not accessible locally. Status code: {response.status_code}")
            print("Streamlit stdout:")
            print(process.stdout.read())
            print("Streamlit stderr:")
            print(process.stderr.read())
    except requests.exceptions.ConnectionError:
        print("Could not connect to Streamlit app locally.")
        print("Streamlit stdout:")
        print(process.stdout.read())
        print("Streamlit stderr:")
        print(process.stderr.read())

    # Keep the Colab environment alive for ngrok tunnel
    # This is a temporary measure; in a real scenario, you'd want a more robust solution
    # For now, a long sleep or user interaction is needed to prevent kernel from stopping
    # time.sleep(3600) # Keep alive for an hour - uncomment if needed for debugging


Launching Streamlit app...




Streamlit App URL: NgrokTunnel: "https://cornual-undejectedly-toccara.ngrok-free.dev" -> "http://localhost:8501"
Please use the URL above to interact with the application.
Streamlit app is running successfully locally.


In [113]:
app_code = app_code.replace(
    "final_score = alpha * sim_score + (1 - alpha) * collab_score",
    "final_score = alpha * sim_score[1] + (1 - alpha) * collab_score"
)
print("Corrected the hybrid_recommend function within the app_code string.")

Corrected the hybrid_recommend function within the app_code string.


In [123]:
# Write the app_code to app.py
with open("app.py", "w") as f:
    f.write(app_code)

# Terminate any previous Streamlit instances
!killall streamlit &>/dev/null

# Run Streamlit in the background
import subprocess
import time
import requests

print("Launching Streamlit app...")
process = subprocess.Popen(
    ["streamlit", "run", "app.py", "--server.port", "8501", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True
)

# Wait for Streamlit to start up
time.sleep(10)

# Setup ngrok
!pip install pyngrok -qq
from pyngrok import ngrok

# Terminate any open ngrok tunnels
ngrok.kill()

# Set up a new ngrok tunnel
NGROK_AUTH_TOKEN = "39KNJq0UihIXfjxtyrXWZAUKyb8_4NWfLWWHPYJeJAjgEd8Nq"  # Replace with your actual ngrok auth token
if NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN":
    print("Please replace 'YOUR_NGROK_AUTH_TOKEN' with your actual ngrok authentication token.")
    print("You can get one from https://ngrok.com/signup")
else:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    public_url = ngrok.connect(8501)
    print(f"Streamlit App URL: {public_url}")
    print("Please use the URL above to interact with the application.")

    # Check if Streamlit is actually running by making a request to its internal endpoint
    try:
        response = requests.get("http://localhost:8501")
        if response.status_code == 200:
            print("Streamlit app is running successfully locally.")
        else:
            print(f"Streamlit app is not accessible locally. Status code: {response.status_code}")
            print("Streamlit stdout:")
            print(process.stdout.read())
            print("Streamlit stderr:")
            print(process.stderr.read())
    except requests.exceptions.ConnectionError:
        print("Could not connect to Streamlit app locally.")
        print("Streamlit stdout:")
        print(process.stdout.read())
        print("Streamlit stderr:")
        print(process.stderr.read())

    # Keep the Colab environment alive for ngrok tunnel
    # This is a temporary measure; in a real scenario, you'd want a more robust solution
    # For now, a long sleep or user interaction is needed to prevent kernel from stopping
    # time.sleep(3600) # Keep alive for an hour - uncomment if needed for debugging

Launching Streamlit app...




Streamlit App URL: NgrokTunnel: "https://cornual-undejectedly-toccara.ngrok-free.dev" -> "http://localhost:8501"
Please use the URL above to interact with the application.
Streamlit app is running successfully locally.


**Reasoning**:
The Streamlit application has been redeployed with the corrected `hybrid_recommend` function. The next step is to provide instructions to the user to verify all functionalities on the publicly accessible Streamlit app.



In [124]:
print("Please check the Streamlit App URL provided above (e.g., https://cornual-undejectedly-toccara.ngrok-free.dev).")
print("Verify the following functionalities:")
print("1. Content-Based Recommendations: Select a movie and check if relevant recommendations with posters are displayed.")
print("2. Collaborative Filtering Recommendations: Select a User ID and check if recommendations with posters are displayed.")
print("3. Hybrid Recommendation: Select a User ID and a movie, adjust alpha, and check if hybrid recommendations with posters are displayed.")
print("4. Find Similar Movies: Enter a movie title and check if similar movies with posters are displayed.")
print("If the application is not running or any functionality is broken, please provide feedback.")

Please check the Streamlit App URL provided above (e.g., https://cornual-undejectedly-toccara.ngrok-free.dev).
Verify the following functionalities:
1. Content-Based Recommendations: Select a movie and check if relevant recommendations with posters are displayed.
2. Collaborative Filtering Recommendations: Select a User ID and check if recommendations with posters are displayed.
3. Hybrid Recommendation: Select a User ID and a movie, adjust alpha, and check if hybrid recommendations with posters are displayed.
4. Find Similar Movies: Enter a movie title and check if similar movies with posters are displayed.
If the application is not running or any functionality is broken, please provide feedback.


In [125]:
# Write the app_code to app.py
with open("app.py", "w") as f:
    f.write(app_code)

# Terminate any previous Streamlit instances
!killall streamlit &>/dev/null

# Run Streamlit in the background
import subprocess
import time
import requests

print("Launching Streamlit app...")
process = subprocess.Popen(
    ["streamlit", "run", "app.py", "--server.port", "8501", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True
)

# Wait for Streamlit to start up
time.sleep(10)

# Setup ngrok
!pip install pyngrok -qq
from pyngrok import ngrok

# Terminate any open ngrok tunnels
ngrok.kill()

# Set up a new ngrok tunnel
NGROK_AUTH_TOKEN = "39KNJq0UihIXfjxtyrXWZAUKyb8_4NWfLWWHPYJeJAjgEd8Nq"  # Using the token from kernel state
if NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN":
    print("Please replace 'YOUR_NGROK_AUTH_TOKEN' with your actual ngrok authentication token.")
    print("You can get one from https://ngrok.com/signup")
else:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    public_url = ngrok.connect(8501)
    print(f"Streamlit App URL: {public_url}")
    print("Please use the URL above to interact with the application.")

    # Check if Streamlit is actually running by making a request to its internal endpoint
    try:
        response = requests.get("http://localhost:8501")
        if response.status_code == 200:
            print("Streamlit app is running successfully locally.")
        else:
            print(f"Streamlit app is not accessible locally. Status code: {response.status_code}")
            print("Streamlit stdout:")
            print(process.stdout.read())
            print("Streamlit stderr:")
            print(process.stderr.read())
    except requests.exceptions.ConnectionError:
        print("Could not connect to Streamlit app locally.")
        print("Streamlit stdout:")
        print(process.stdout.read())
        print("Streamlit stderr:")
        print(process.stderr.read())

    # Keep the Colab environment alive for ngrok tunnel
    # This is a temporary measure; in a real scenario, you'd want a more robust solution
    # For now, a long sleep or user interaction is needed to prevent kernel from stopping
    # time.sleep(3600) # Keep alive for an hour - uncomment if needed for debugging

Launching Streamlit app...




Streamlit App URL: NgrokTunnel: "https://cornual-undejectedly-toccara.ngrok-free.dev" -> "http://localhost:8501"
Please use the URL above to interact with the application.
Streamlit app is running successfully locally.


In [126]:
print("Please check the Streamlit App URL provided above (e.g., https://cornual-undejectedly-toccara.ngrok-free.dev).")
print("Verify the following functionalities:")
print("1. Content-Based Recommendations: Select a movie and check if relevant recommendations with posters are displayed.")
print("2. Collaborative Filtering Recommendations: Select a User ID and check if recommendations with posters are displayed.")
print("3. Hybrid Recommendation: Select a User ID and a movie, adjust alpha, and check if hybrid recommendations with posters are displayed.")
print("4. Find Similar Movies: Enter a movie title and check if similar movies with posters are displayed.")
print("If the application is not running or any functionality is broken, please provide feedback.")

Please check the Streamlit App URL provided above (e.g., https://cornual-undejectedly-toccara.ngrok-free.dev).
Verify the following functionalities:
1. Content-Based Recommendations: Select a movie and check if relevant recommendations with posters are displayed.
2. Collaborative Filtering Recommendations: Select a User ID and check if recommendations with posters are displayed.
3. Hybrid Recommendation: Select a User ID and a movie, adjust alpha, and check if hybrid recommendations with posters are displayed.
4. Find Similar Movies: Enter a movie title and check if similar movies with posters are displayed.
If the application is not running or any functionality is broken, please provide feedback.


In [134]:
print("Please check the Streamlit App URL provided above (e.g., https://cornual-undejectedly-toccara.ngrok-free.dev).")
print("Verify the following functionalities:")
print("1. Content-Based Recommendations: Select a movie and check if relevant recommendations with posters are displayed.")
print("2. Collaborative Filtering Recommendations: Select a User ID and check if recommendations with posters are displayed.")
print("3. Hybrid Recommendation: Select a User ID and a movie, adjust alpha, and check if hybrid recommendations with posters are displayed.")
print("4. Find Similar Movies: Enter a movie title and check if similar movies with posters are displayed, with the correct movie title shown in the subheader.")
print("5. Check if the background color of the website is now black.")
print("If the application is not running or any functionality is broken, please provide feedback.")

Please check the Streamlit App URL provided above (e.g., https://cornual-undejectedly-toccara.ngrok-free.dev).
Verify the following functionalities:
1. Content-Based Recommendations: Select a movie and check if relevant recommendations with posters are displayed.
2. Collaborative Filtering Recommendations: Select a User ID and check if recommendations with posters are displayed.
3. Hybrid Recommendation: Select a User ID and a movie, adjust alpha, and check if hybrid recommendations with posters are displayed.
4. Find Similar Movies: Enter a movie title and check if similar movies with posters are displayed, with the correct movie title shown in the subheader.
5. Check if the background color of the website is now black.
If the application is not running or any functionality is broken, please provide feedback.
