In [1]:
import pandas as pd
from surprise import Dataset, Reader
import os

# DATASET: https://grouplens.org/datasets/movielens/
# load csv
workingDir = r'dataset'
data = pd.read_csv(os.path.join(workingDir, r'ratings.csv'))


# rename for surprise lib
data.rename(columns={'userId': 'user', 'movieId': 'item', 'rating': 'raw_ratings'}, inplace=True)

# reconstruct for surprise lib
reader = Reader(rating_scale=(1, 5))
data_surprise = Dataset.load_from_df(data[['user', 'item', 'raw_ratings']], reader)

# head
data.head()


Unnamed: 0,user,item,raw_ratings,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [2]:
from surprise.model_selection import train_test_split

trainset, testset = train_test_split(data_surprise, test_size=0.2)

user_data = []
item_data = []
rating_data = []

num_of_ratings = 5  # Number of ratings to fetch

print(trainset.n_users)
print(trainset.n_items)
print(trainset.n_ratings)
print(trainset.global_mean)

for u, i, r in trainset.all_ratings():
    user_data.append(trainset.to_raw_uid(u))
    item_data.append(trainset.to_raw_iid(i))
    rating_data.append(r)
    if len(user_data) >= num_of_ratings:
        break

# Create a DataFrame
df_sample = pd.DataFrame({
    'user': user_data,
    'item': item_data,
    'rating': rating_data
})

print(df_sample)

610
8980
80668
3.5002045420736847
   user    item  rating
0   525    4054     3.0
1   525    2145     3.5
2   525   70286     4.0
3   525    1682     4.5
4   525  137337     2.5


In [3]:
from surprise import SVD
from surprise import accuracy

# Initialize the SVD model
model = SVD()


model.fit(trainset)

predictions = model.test(testset)

# Calculate and print the RMSE
rmse = accuracy.rmse(predictions)
print(f"RMSE: {rmse}")

user_id = 610
user_ratings = trainset.ur[trainset.to_inner_uid(user_id)]
known_items = [trainset.to_raw_iid(ir[0]) for ir in user_ratings]
predicted_ratings = []

for item_id in trainset.all_items():
    raw_item_id = trainset.to_raw_iid(item_id)
    if raw_item_id not in known_items:  # Skip known items
        predicted_ratings.append((raw_item_id, model.predict(user_id, raw_item_id).est))

# Get top 10 recommendations
top_n_recommendations = sorted(predicted_ratings, key=lambda x: x[1], reverse=True)[:10]
print(top_n_recommendations)



RMSE: 0.8704
RMSE: 0.870394658808816
[(4973, 4.8014882963198335), (1148, 4.795497250623815), (1196, 4.779028547947242), (1203, 4.72121892534171), (3030, 4.636961013735658), (1223, 4.608061266640207), (4226, 4.60422060309087), (1197, 4.602087928687854), (3508, 4.594674081130023), (2571, 4.567786197784618)]


* Not: RMSE biraz yüksek çıktı :(

In [4]:
movie_titles_df = pd.read_csv(os.path.join(workingDir, r'movies.csv'))
movie_titles_df.head()

merged_df = pd.merge(data, movie_titles_df, left_on='item', right_on='movieId')



In [5]:
merged_df.head()
user_610_data = merged_df[merged_df['user'] == 610]
user_610_data



Unnamed: 0,user,item,raw_ratings,timestamp,movieId,title,genres
214,610,1,5.0,1479542900,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
368,610,6,5.0,1493850345,6,Heat (1995),Action|Crime|Thriller
571,610,47,5.0,1479545853,47,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
775,610,50,4.0,1493844757,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
830,610,70,4.0,1495959282,70,From Dusk Till Dawn (1996),Action|Comedy|Horror|Thriller
...,...,...,...,...,...,...,...
100831,610,160341,2.5,1479545749,160341,Bloodmoon (1997),Action|Thriller
100832,610,160527,4.5,1479544998,160527,Sympathy for the Underdog (1971),Action|Crime|Drama
100833,610,160836,3.0,1493844794,160836,Hazard (2005),Action|Drama|Thriller
100834,610,163937,3.5,1493848789,163937,Blair Witch (2016),Horror|Thriller


In [6]:
for movie_id, rating in top_n_recommendations:
    movie_row = movie_titles_df[movie_titles_df['movieId'] == movie_id]
    title = movie_row['title'].values[0]
    genres = movie_row['genres'].values[0]
    print(f"Movie ID: {movie_id}, Title: {title}, Genres: {genres}, Predicted Rating: {rating:.2f}")
    print("\n")

Movie ID: 4973, Title: Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001), Genres: Comedy|Romance, Predicted Rating: 4.80


Movie ID: 1148, Title: Wallace & Gromit: The Wrong Trousers (1993), Genres: Animation|Children|Comedy|Crime, Predicted Rating: 4.80


Movie ID: 1196, Title: Star Wars: Episode V - The Empire Strikes Back (1980), Genres: Action|Adventure|Sci-Fi, Predicted Rating: 4.78


Movie ID: 1203, Title: 12 Angry Men (1957), Genres: Drama, Predicted Rating: 4.72


Movie ID: 3030, Title: Yojimbo (1961), Genres: Action|Adventure, Predicted Rating: 4.64


Movie ID: 1223, Title: Grand Day Out with Wallace and Gromit, A (1989), Genres: Adventure|Animation|Children|Comedy|Sci-Fi, Predicted Rating: 4.61


Movie ID: 4226, Title: Memento (2000), Genres: Mystery|Thriller, Predicted Rating: 4.60


Movie ID: 1197, Title: Princess Bride, The (1987), Genres: Action|Adventure|Comedy|Fantasy|Romance, Predicted Rating: 4.60


Movie ID: 3508, Title: Outlaw Josey Wales, The (1976), Genres: Act