In [None]:
import pandas as pd
import streamlit as st
import requests
from bs4 import BeautifulSoup

links = pd.read_csv('/Users/subinjosethomas/Desktop/Bootcamp/MainCourse/07_Recommender_Systems/ml-latest-small/links.csv')
movies = pd.read_csv('/Users/subinjosethomas/Desktop/Bootcamp/MainCourse/07_Recommender_Systems/ml-latest-small/movies.csv')
ratings = pd.read_csv('/Users/subinjosethomas/Desktop/Bootcamp/MainCourse/07_Recommender_Systems/ml-latest-small/ratings.csv')
tags = pd.read_csv('/Users/subinjosethomas/Desktop/Bootcamp/MainCourse/07_Recommender_Systems/ml-latest-small/tags.csv')

# Define functions
@st.cache_data()
def get_movie_poster(imdb_id):
    url = f"https://www.imdb.com/title/%7Bimdb_id%7D/"
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    tag = soup.find("meta", property="og:image")
    if tag is not None:
        return tag["content"]
    else:
        return None


def get_top_movies(movie_id, num_recommendations):
    rating_1 = pd.DataFrame(ratings.groupby('movieId')['rating'].mean())
    rating_1['rating_count'] = ratings.groupby('movieId')['rating'].count()
    rating_1['weighted_score'] = (0.8 * rating_1['rating_count']) * rating_1['rating']
    rating_sorted = rating_1.sort_values('weighted_score', ascending=False)

    top_movies = (
        movies
        .merge(rating_sorted, on="movieId")
        .sort_values(by="weighted_score", ascending=False)
    )

    top_movies = top_movies[top_movies["movieId"] != movie_id].head(num_recommendations)

    return top_movies


# Set up page
st.set_page_config(page_title="Movie Recommendation System")

st.title("Movie Recommendation System")

# Select movie
movie_options = movies["title"].tolist()
movie_options.insert(0, "")

selected_movie = st.selectbox("Select a movie:", movie_options)

if selected_movie:
    movie_id = movies[movies["title"] == selected_movie]["movieId"].values[0]

    # Show top recommended movies
    st.subheader("Top Recommendations")
    top_movies = get_top_movies(movie_id, 5)
    for i, row in top_movies.iterrows():
        title = row["title"]
        imdb_id = str(links[links["movieId"] == row["movieId"]]["imdbId"].values[0]).zfill(7)
        poster_url = get_movie_poster(imdb_id)

        st.write(f"## {i+1}. {title}")
        if poster_url is not None:
            st.image(poster_url, width=200)
        else:
            st.write("Poster not available.")