<a href="https://colab.research.google.com/github/ranjith-rk-7/NM-PROJECT1/blob/main/HACKATHON.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from surprise import Dataset, Reader, SVD
print("Surprise imported successfully!")
import zipfile
import os
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

# 1. Extract the ZIP file (if not extracted yet)
zip_path = "MOVIE.zip"
extract_folder = "movie_data"

if not os.path.exists(extract_folder):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)
    print("ZIP extracted!")
else:
    print("Data already extracted.")

# 2. Load ratings and movies data
ratings_path = os.path.join(extract_folder, "ml-100k", "u.data")
movies_path = os.path.join(extract_folder, "ml-100k", "u.item")

ratings = pd.read_csv(ratings_path, sep='\t', names=['userId', 'movieId', 'rating', 'timestamp'])
ratings.drop('timestamp', axis=1, inplace=True)

movies = pd.read_csv(movies_path, sep='|', encoding='latin-1', header=None, usecols=[0,1], names=['movieId', 'title'])

print("Sample ratings:")
print(ratings.head())
print("\nSample movies:")
print(movies.head())

# 3. Prepare dataset for Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# 4. Train SVD model
model = SVD()
model.fit(trainset)

# 5. Recommendation function
def get_top_n_recommendations(user_id, n=5):
    all_movie_ids = movies['movieId'].unique()
    rated_movie_ids = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    unrated_movie_ids = [mid for mid in all_movie_ids if mid not in rated_movie_ids]

    predictions = [model.predict(user_id, mid) for mid in unrated_movie_ids]
    predictions.sort(key=lambda x: x.est, reverse=True)

    top_n = predictions[:n]
    recommended_movies = []
    for pred in top_n:
        movie_title = movies[movies['movieId'] == pred.iid]['title'].values
        if len(movie_title):
            recommended_movies.append(movie_title[0])
    return recommended_movies

# 6. Test recommendations for a user
user_to_test = 196
print(f"\nTop 5 recommendations for user {user_to_test}:")
print(get_top_n_recommendations(user_to_test, n=5))


Surprise imported successfully!
ZIP extracted!
Sample ratings:
   userId  movieId  rating
0     196      242       3
1     186      302       3
2      22      377       1
3     244       51       2
4     166      346       1

Sample movies:
   movieId              title
0        1   Toy Story (1995)
1        2   GoldenEye (1995)
2        3  Four Rooms (1995)
3        4  Get Shorty (1995)
4        5     Copycat (1995)

Top 5 recommendations for user 196:
['Shawshank Redemption, The (1994)', 'Raging Bull (1980)', 'Star Wars (1977)', 'Wrong Trousers, The (1993)', 'Raiders of the Lost Ark (1981)']
