In [None]:
!pip install streamlit pyngrok



In [None]:
!pip install streamlit pyngrok pandas scikit-learn numpy



In [None]:
!pip install fuzzywuzzy




In [None]:
!ngrok config add-authtoken ##################

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
with open("Tmovies.py", "w") as f:
    f.write("""


import streamlit as st
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
import json
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import LatentDirichletAllocation
import spacy
from textblob import TextBlob
from fuzzywuzzy import fuzz
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity



# Paths to dataset and favorites file
dataset_path = '/content/drive/My Drive/dataset477/TMDB_movie_dataset_v11.csv'
favorites_path = '/content/drive/My Drive/favorites.json'

# Load the dataset
@st.cache_data
def load_data():
    try:
        df = pd.read_csv(dataset_path)
        df = df[df['poster_path'].notna()]  # Filter movies with posters only
        return df
    except Exception as e:
        st.error(f"Failed to load dataset: {e}")
        return pd.DataFrame()

def load_favorites():
    try:
        with open(favorites_path, 'r') as f:
            favorites = json.load(f)
            if not favorites:  # If favorites are empty, return empty dictionary
                return {}
            return favorites
    except (FileNotFoundError, json.JSONDecodeError):
        # Handle both missing file and decoding errors by returning empty dictionary
        return {}

# Save the favorites list to the JSON file
def save_favorites(favorites):
    with open(favorites_path, 'w') as f:
        json.dump(favorites, f, indent=4)  # Use indent=4 for better readability


# Display movie details in a modal-like view
def display_movie_details(movie):
    st.markdown("### Movie Details")
    tmdb_base_url = "https://image.tmdb.org/t/p/w500"
    poster_url = tmdb_base_url + movie.get('poster_path', '')

    # Layout for the modal-like view
    col1, col2 = st.columns([1, 2])
    with col1:
        if movie.get('poster_path'):  # Check if poster_path is available
            try:
                response = requests.get(poster_url, timeout=5)
                if response.status_code == 200:
                    image = Image.open(BytesIO(response.content))
                    st.image(image, use_container_width=True)
                else:
                    st.text("Poster not available")
            except Exception:
                st.text("Error loading poster")
        else:
            st.text("Poster not available")

    with col2:
        st.markdown(f"**Title:** {movie.get('title', 'N/A')}")
        st.markdown(f"**Release Date:** {movie.get('release_date', 'N/A')}")
        st.markdown(f"**Rating:** {movie.get('vote_average', 'N/A')} (Votes: {movie.get('vote_count', 'N/A')})")
        st.markdown(f"**Language:** {movie.get('original_language', 'N/A').upper()}")
        st.markdown(f"**Genres:** {movie.get('genres', 'N/A')}")
        st.markdown(f"**Tagline:** {movie.get('tagline', 'N/A')}")
        st.markdown(f"**Keywords:** {movie.get('keywords', 'N/A')}")
        st.markdown(f"**Overview:** {movie.get('overview', 'N/A')}")
        st.markdown(f"**Runtime:** {movie.get('runtime', 'N/A')} minutes")
        st.markdown(f"**Popularity:** {movie.get('popularity', 'N/A')}")

    # Back button
    if st.button("Back to Recommendations", key="back_to_recommendations"):
        del st.session_state["selected_movie"]



def display_movies_in_columns(movies, key_prefix, similarity_col=None):
    tmdb_base_url = "https://image.tmdb.org/t/p/w500"
    total_movies = len(movies)

    # Initialize session state
    if f"{key_prefix}_start_idx" not in st.session_state:
        st.session_state[f"{key_prefix}_start_idx"] = 0

    start_idx = st.session_state[f"{key_prefix}_start_idx"]
    end_idx = min(start_idx + 5, total_movies)
    cols = st.columns(5)

    for idx, movie_idx in enumerate(range(start_idx, end_idx)):
        movie = movies.iloc[movie_idx]
        with cols[idx]:
            st.markdown(
                f"<div style='text-align: center; font-size:14px; height:50px; overflow:hidden;'>"
                f"{movie['title']}</div>", unsafe_allow_html=True
            )
            poster_url = (
                tmdb_base_url + movie['poster_path']
                if 'poster_path' in movie and pd.notna(movie['poster_path'])
                else None
            )
            if poster_url:
                try:
                    response = requests.get(poster_url, timeout=5)
                    if response.status_code == 200:
                        image = Image.open(BytesIO(response.content))
                        st.image(image, use_container_width=True)
                    else:
                        st.text("Poster not available")
                except Exception as e:
                    st.text("Error loading poster")
            else:
                st.text("Poster not available")
            if similarity_col and similarity_col in movie:
                st.markdown(
                    f"<div style='text-align: center; color: green;'>"
                    f"Accuracy: {movie[similarity_col]:.2f}%</div>",
                    unsafe_allow_html=True,
                )
            if st.button("Details", key=f"{key_prefix}_details_{movie_idx}"):
                st.session_state["selected_movie"] = movie
            if st.button("Add to Favorites", key=f"{key_prefix}_fav_{movie_idx}"):
                add_to_favorites(movie)


    # Navigation buttons
    col1, col2, col3 = st.columns([1, 3, 1])
    with col1:
        if st.button("<", key=f"{key_prefix}_prev_{start_idx}") and start_idx > 0:
            st.session_state[f"{key_prefix}_start_idx"] -= 5
    with col3:
        if st.button(">", key=f"{key_prefix}_next_{start_idx}") and end_idx < total_movies:
            st.session_state[f"{key_prefix}_start_idx"] += 5



# Function to perform fuzzy search and rank movies based on similarity
def search_movie(query, df, top_n=20, threshold=60):
    query_lower = query.lower()
    search_results = []

    # Iterate through DataFrame and calculate similarity scores
    for idx, row in df.iterrows():
        title = str(row['title']).strip() if isinstance(row['title'], str) else ""
        if not title:
            continue

        title_lower = title.lower()  # Precompute lowercase title
        fuzzy_score = fuzz.partial_ratio(query_lower, title_lower)  # Fuzzy match score
        substring_score = 100 if query_lower in title_lower else 0  # Substring match score
        combined_score = max(fuzzy_score, substring_score)  # Combine scores

        if combined_score > threshold and len(title.split()) > 1:  # Filter short titles
            search_results.append((title, row['id'], round(combined_score, 2)))  # Round similarity score

    # Sort results by similarity score in descending order
    search_results = sorted(search_results, key=lambda x: x[2], reverse=True)

    # Convert to DataFrame and return top_n results
    return pd.DataFrame(search_results, columns=['title', 'id', 'similarity']).head(top_n)








# Text Similarity-Based Recommendations
def get_text_similarity_recommendations(favorite_movies, df, top_n=20):
    # Handle missing keywords
    df['keywords'] = df['keywords'].fillna('')

    # TF-IDF vectorization
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(df['keywords'])

    # Get indices of favorite movies
    favorite_indices = df[df['title'].isin(favorite_movies)].index

    recommendations = []
    for idx in favorite_indices:
        cosine_sim = cosine_similarity(tfidf_matrix[idx], tfidf_matrix).flatten()
        similar_indices = cosine_sim.argsort()[-top_n - 1:-1]
        similar_movies = df.iloc[similar_indices][['id', 'title', 'poster_path', 'keywords', 'vote_average', 'release_date', 'overview']]
        similar_movies['similarity'] = cosine_sim[similar_indices]
        recommendations.append(similar_movies)

    # Check if recommendations list is empty
    if not recommendations:
        print("No recommendations found for the provided favorite movies.")
        return pd.DataFrame()  # Return an empty DataFrame

    # Combine and deduplicate
    recommendations_df = pd.concat(recommendations).drop_duplicates(subset='id')
    recommendations_df = recommendations_df.sort_values(by='similarity', ascending=False)

    return recommendations_df[['id', 'title', 'poster_path', 'keywords', 'similarity', 'vote_average', 'release_date', 'overview']].head(top_n)



# Clustering-Based Recommendations
def cluster_movies(df, n_clusters=10):
    scaler = StandardScaler()
    features = ['vote_average', 'popularity', 'runtime']
    df = df.dropna(subset=features)
    if df.empty:
        return df
    X = scaler.fit_transform(df[features])
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    df['cluster'] = kmeans.fit_predict(X)
    return df






def add_to_favorites(movie):
    username = st.session_state.get("username")
    if username:
        favorites = load_favorites()
        if username not in favorites:
            favorites[username] = []

        # Convert 'id' to a native int to avoid serialization issues
        movie_info = {
            'id': int(movie['id']),  # Convert numpy.int64 to Python int
            'title': movie['title']
        }

        # Check if the movie is already in the favorites list
        if movie_info not in favorites[username]:
            favorites[username].append(movie_info)
            save_favorites(favorites)
            st.success(f"Added {movie['title']} to favorites!")
        else:
            st.warning(f"{movie['title']} is already in your favorites.")
    else:
        st.warning("Please log in to add favorites.")


# Remove a movie from the favorites list
def remove_from_favorites(movie_id):
    username = st.session_state.get("username")
    if username:
        favorites = load_favorites()
        if username in favorites:
            # Ensure comparison is done with Python int
            movie_to_remove = next(
                (movie for movie in favorites[username] if movie['id'] == int(movie_id)), None
            )
            if movie_to_remove:
                favorites[username].remove(movie_to_remove)
                save_favorites(favorites)
                st.success(f"Removed {movie_to_remove['title']} from favorites!")
            else:
                st.warning("Movie not found in your favorites.")
        else:
            st.warning("No favorites found.")
    else:
        st.warning("Please log in to remove favorites.")




# Load and preprocess the data
movies = load_data()
if movies.empty:
    st.error("No movies available to display. Check the dataset path.")
else:
    movies['release_date'] = pd.to_datetime(movies['release_date'], errors='coerce')
    movies = movies.dropna(subset=['release_date'])
    movies['release_year'] = movies['release_date'].dt.year

# Header
st.title("Movie Recommender System by Tasnim")

# Add search bar to the sidebar
search_query = st.sidebar.text_input("Search Movie by Title")
if search_query:
    # Store the search query in session state
    st.session_state["search_query"] = search_query


# Login System
st.sidebar.header("Login")
if "username" not in st.session_state:
    st.session_state["username"] = None

username = st.sidebar.text_input("Username", value=st.session_state["username"] or "")
password = st.sidebar.text_input("Password", type="password")
if st.sidebar.button("Login"):
    st.session_state["username"] = username
    st.success(f"Welcome, {username}!")

# Favorites
favorites = load_favorites()
if username:
      st.sidebar.header(f"Favorites of {username}")
      user_favorites = favorites.get(username, [])
      for fav in user_favorites:
          col1, col2 = st.sidebar.columns([3, 1])
          col1.text(fav['title'])
          if col2.button("x", key=f"remove_{fav['id']}"):
              remove_from_favorites(fav['id'])


# Sidebar filters
st.sidebar.header("Filter Movies")
show_adult = st.sidebar.checkbox("Include Adult Movies", value=False)
year_range = st.sidebar.slider("Year Range", int(movies['release_year'].min()), int(movies['release_year'].max()), (2000, 2023))
preferences = {
    'vote_average': st.sidebar.slider("Minimum Rating", 0.0, 10.0, 7.0),
    'vote_count': st.sidebar.slider("Minimum Vote Count", 0, 10000, 1000),
    'popularity': st.sidebar.slider("Minimum Popularity", 0.0, float(movies['popularity'].max()), 10.0),
    'runtime': st.sidebar.slider("Maximum Runtime (minutes)", 0, int(movies['runtime'].max()), 1440),
}
selected_genre = st.sidebar.selectbox("Genre", ["All"] + sorted(set(genre for genres in movies['genres'].dropna() for genre in genres.split(", "))))

# Filter movies dynamically
filtered_movies = movies[
    (~movies['adult'] | show_adult) &
    (movies['vote_average'] >= preferences['vote_average']) &
    (movies['vote_count'] >= preferences['vote_count']) &
    (movies['popularity'] >= preferences['popularity']) &
    (movies['runtime'] <= preferences['runtime']) &
    (movies['release_year'].between(*year_range))
]
if selected_genre != "All":
    filtered_movies = filtered_movies[filtered_movies['genres'].str.contains(selected_genre, na=False)]

# Assuming filtered_movies is your dataset and username is defined
if "selected_movie" in st.session_state:
    display_movie_details(st.session_state["selected_movie"])
elif search_query:  # Show search results on the main page
    st.header("Search Results")
    search_results = search_movie(search_query, movies)
    if not search_results.empty:
        search_movies = movies[movies['id'].isin(search_results['id'])]
        display_movies_in_columns(search_movies, "search_results", similarity_col="similarity")
    else:
        st.warning("No results found for your search query.")

    # Add a Back button to go back to the previous section
    if st.button("Back to Recommendations"):
        # Clear the search query
        st.session_state["search_query"] = ""

        # Reset any necessary session states and return to the main page
        st.session_state["selected_movie"] = None  # Reset any selected movie if needed
        st.session_state["start_idx"] = 0  # Optionally reset movie list to the start

else:
    # Recommendations: Most Voted Movies
    st.header("Most Voted Movies")
    most_voted_movies = filtered_movies.sort_values(by='vote_count', ascending=False).head(20)
    display_movies_in_columns(most_voted_movies, "most_voted")

    # Recommendations: Highest Rated Movies
    st.header("Highest Rated Movies")
    highest_rated_movies = filtered_movies.sort_values(by='vote_average', ascending=False).head(20)
    display_movies_in_columns(highest_rated_movies, "highest_rated")

    # Display Text Similarity-Based Recommendations
    st.header("Text Similarity-Based Recommendations")
    if username and user_favorites:
        favorite_titles = [movie['title'] for movie in user_favorites]
        text_recs = get_text_similarity_recommendations(favorite_titles, filtered_movies)
        display_movies_in_columns(text_recs, "text_similarity", similarity_col="similarity")
    else:
        st.warning("Add movies to your favorites to see recommendations.")

    # Display Clustering-Based Recommendations
    st.header("Clustering-Based Recommendations")
    clustered_movies = cluster_movies(filtered_movies)
    if 'cluster' in clustered_movies.columns:
        for cluster in clustered_movies['cluster'].unique():
            st.subheader(f"Cluster {cluster} Recommendations")
            cluster_recs = clustered_movies[clustered_movies['cluster'] == cluster][['id', 'title', 'poster_path', 'keywords', 'vote_average', 'release_date', 'overview']]
            display_movies_in_columns(cluster_recs, f"cluster_{cluster}")
    else:
        st.warning("Unable to generate clustering-based recommendations.")






""")





In [None]:
!killall ngrok

In [None]:
!streamlit run Tmovies.py --server.port=8501 &>/dev/null &

In [None]:
from pyngrok import ngrok

# Expose the Streamlit app running on port 8501
public_url = ngrok.connect("8501")  # Ensure port is passed as a string
print(f"Streamlit app is live at {public_url}")

Streamlit app is live at NgrokTunnel: "https://dee5-35-186-174-208.ngrok-free.app" -> "http://localhost:8501"
