In [1]:
from google.colab import files
import zipfile, io, pandas as pd, numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

uploaded = files.upload()

Saving ml-100k.zip to ml-100k.zip


In [2]:
with zipfile.ZipFile(io.BytesIO(uploaded['ml-100k.zip']), 'r') as zip_ref:
    zip_ref.extractall('data')


In [3]:
ratings = pd.read_csv('data/ml-100k/u.data', sep='\t',
                      names=['user_id','item_id','rating','timestamp'])
movies = pd.read_csv('data/ml-100k/u.item', sep='|', encoding='latin-1',
                     names=['item_id','title','release_date','video_release_date','IMDb_URL',
                            'unknown','Action','Adventure','Animation','Children','Comedy','Crime',
                            'Documentary','Drama','Fantasy','Film-Noir','Horror','Musical','Mystery',
                            'Romance','Sci-Fi','Thriller','War','Western'])
data = pd.merge(ratings, movies[['item_id','title']], on='item_id')

In [4]:
genre_cols = ['Action','Adventure','Animation','Children','Comedy','Crime','Documentary','Drama',
              'Fantasy','Film-Noir','Horror','Musical','Mystery','Romance','Sci-Fi','Thriller','War','Western']
movies['genres'] = movies[genre_cols].apply(lambda x: ' '.join(x.index[x==1]), axis=1)
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])
content_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()
id_to_index = dict(zip(movies['item_id'], movies.index))

In [5]:
user_movie_matrix = ratings.pivot_table(index='user_id', columns='item_id', values='rating').fillna(0)
user_sim = cosine_similarity(user_movie_matrix)
user_sim_df = pd.DataFrame(user_sim, index=user_movie_matrix.index, columns=user_movie_matrix.index)

def collab_recommend(user_id, top_k=10):
    if user_id not in user_movie_matrix.index:
        return pd.DataFrame(columns=['title'])
    sim_users = user_sim_df[user_id].sort_values(ascending=False)[1:50]
    weighted_ratings = user_movie_matrix.loc[sim_users.index].T.dot(sim_users)
    scores = weighted_ratings / sim_users.sum()
    scores = scores.dropna().sort_values(ascending=False)
    seen = user_movie_matrix.loc[user_id]
    unseen = scores[seen == 0]
    rec_items = unseen.head(top_k).index
    return movies.loc[movies['item_id'].isin(rec_items), ['title']]

In [6]:
def hybrid_recommend(user_id, liked_title, alpha=0.6, top_k=10):
    if user_id in user_movie_matrix.index:
        cf_df = collab_recommend(user_id, top_k=50)
        cf_titles = cf_df['title'].tolist()
        cf_scores = {t:(50-i) for i,t in enumerate(cf_titles)}
    else:
        cf_scores = {}

    if liked_title not in indices:
        return ["Movie not found."]
    idx = indices[liked_title]
    sim_scores = list(enumerate(content_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x:x[1], reverse=True)[1:50]
    content_titles = movies['title'].iloc[[i[0] for i in sim_scores]].tolist()
    content_scores = {t:(50-i) for i,t in enumerate(content_titles)}

    all_titles = set(cf_scores.keys()) | set(content_scores.keys())
    hybrid_scores = {t:alpha*cf_scores.get(t,0)+(1-alpha)*content_scores.get(t,0) for t in all_titles}
    top_titles = sorted(hybrid_scores.items(), key=lambda x:x[1], reverse=True)[:top_k]
    return pd.DataFrame([t for t,_ in top_titles], columns=['title'])

In [7]:
trending = data.groupby('title').agg({'rating':'mean','user_id':'count'})
trending = trending.sort_values(by=['rating','user_id'], ascending=False).head(10)

In [9]:
!pip -q install gradio==4.44.0
import gradio as gr

def show_trending():
    return trending.reset_index().to_markdown(index=False)

def recommend_content(movie):
    if movie not in indices:
        return "Movie not found."
    idx = indices[movie]
    sim_scores = list(enumerate(content_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x:x[1], reverse=True)[1:11]
    recs = movies['title'].iloc[[i[0] for i in sim_scores]].tolist()
    return pd.DataFrame(recs, columns=['Recommended Movies']).to_markdown(index=False)

def recommend_collab(user_id):
    try:
        user_id = int(user_id)
    except:
        return "Enter a valid integer user ID."
    return collab_recommend(user_id, top_k=10).to_markdown(index=False)

def recommend_hybrid(user_id, movie):
    try:
        user_id = int(user_id)
    except:
        return "Enter a valid integer user ID."
    df = hybrid_recommend(user_id, movie, alpha=0.6, top_k=10)
    return df.to_markdown(index=False)

with gr.Blocks(title="Hybrid Movie Recommender") as demo:
    gr.Markdown("# ðŸŽ¬ Hybrid Movie Recommendation System")
    with gr.Tab("Trending"):
        out1 = gr.Markdown()
        btn1 = gr.Button("Show Trending Movies")
        btn1.click(show_trending, None, out1)
    with gr.Tab("Content-Based"):
        movie_in = gr.Textbox(label="Enter a Movie Title (e.g. Toy Story (1995))")
        out2 = gr.Markdown()
        btn2 = gr.Button("Recommend Similar Movies")
        btn2.click(recommend_content, movie_in, out2)
    with gr.Tab("Collaborative"):
        user_in = gr.Textbox(label="Enter User ID (1â€“943)")
        out3 = gr.Markdown()
        btn3 = gr.Button("Recommend for User")
        btn3.click(recommend_collab, user_in, out3)
    with gr.Tab("Hybrid (Personalized + Similar)"):
        uid = gr.Textbox(label="User ID (1â€“943)")
        mov = gr.Textbox(label="Movie Title")
        out4 = gr.Markdown()
        btn4 = gr.Button("Recommend Hybrid")
        btn4.click(recommend_hybrid, [uid, mov], out4)

print("\nLaunching Gradio appâ€¦")
demo.launch(share=True)


Launching Gradio appâ€¦


--------


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://8b7ba11ad3a4f50321.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


