In [1]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25ldone
[?25h  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp311-cp311-macosx_10_9_x86_64.whl size=1126009 sha256=76588faf2a8a0d088042ddc0dbcde175231e56fd82583ae062410852c4cd976f
  Stored in directory: /Users/kardelensenkus/Library/Caches/pip/wheels/f4/2b/26/e2a5eae55d3b7688995e66abe7f40473aac6c95ddd8ee174a8
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.3 surprise-0.1


In [2]:
import pandas as pd
from surprise import Reader, SVD, Dataset, accuracy
from surprise.model_selection import GridSearchCV, train_test_split, cross_validate
pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings('ignore')

In [3]:
movie = pd.read_csv('desktop/movie.csv')

In [4]:
rating = pd.read_csv('desktop/rating.csv')

In [5]:
df = movie.merge(rating, how="left", on="movieId")

In [6]:
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.0,4.0,1999-12-11 13:36:47
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,6.0,5.0,1997-03-13 17:50:52
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8.0,4.0,1996-06-05 13:37:51
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,10.0,4.0,1999-11-25 02:44:47
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11.0,4.5,2009-01-02 01:13:41


In [7]:
movie_ids = [130219, 356, 4422, 541]

In [8]:
movies = ["The Dark Knight (2011)",
          "Cries and Whispers (Viskningar och rop) (1972)",
          "Forrest Gump (1994)",
          "Blade Runner (1982)"]

In [9]:
sample_df = df[df.movieId.isin(movie_ids)]

In [10]:
sample_df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
2457839,356,Forrest Gump (1994),Comedy|Drama|Romance|War,4.0,4.0,1996-08-24 09:28:42
2457840,356,Forrest Gump (1994),Comedy|Drama|Romance|War,7.0,4.0,2002-01-16 19:02:55
2457841,356,Forrest Gump (1994),Comedy|Drama|Romance|War,8.0,5.0,1996-06-05 13:44:19
2457842,356,Forrest Gump (1994),Comedy|Drama|Romance|War,9.0,4.0,2001-07-01 20:26:38
2457843,356,Forrest Gump (1994),Comedy|Drama|Romance|War,10.0,3.0,1999-11-25 02:32:02


In [11]:
sample_df.shape

(97343, 6)

In [12]:
user_movie_df = sample_df.pivot_table(index=["userId"],
                                      columns=["title"],
                                      values="rating")

In [13]:
user_movie_df.shape

(76918, 4)

In [14]:
reader = Reader(rating_scale=(1, 5))

In [15]:
data = Dataset.load_from_df(sample_df[['userId',
                                       'movieId',
                                       'rating']], reader)

In [16]:
# Modelleme

In [17]:
trainset, testset = train_test_split(data, test_size=.25)

In [18]:
svd_model = SVD()

In [19]:
svd_model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1874c3c50>

In [20]:
predictions = svd_model.test(testset)

In [21]:
accuracy.rmse(predictions)

RMSE: 0.9361


0.9361427423745392

In [22]:
svd_model.predict(uid=1.0, iid=541, verbose=True)

user: 1.0        item: 541        r_ui = None   est = 4.03   {'was_impossible': False}


Prediction(uid=1.0, iid=541, r_ui=None, est=4.029822162103998, details={'was_impossible': False})

In [23]:
svd_model.predict(uid=1.0, iid=356, verbose=True)

user: 1.0        item: 356        r_ui = None   est = 4.21   {'was_impossible': False}


Prediction(uid=1.0, iid=356, r_ui=None, est=4.210577244921602, details={'was_impossible': False})

In [24]:
sample_df[sample_df["userId"] == 1]

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
3612352,541,Blade Runner (1982),Action|Sci-Fi|Thriller,1.0,4.0,2005-04-02 23:30:03


In [25]:
# Model Tuning

In [26]:
param_grid = {'n_epochs': [5, 10, 20],
              'lr_all': [0.002, 0.005, 0.007]}

In [27]:
gs = GridSearchCV(SVD,
                  param_grid,
                  measures=['rmse', 'mae'],
                  cv=3,
                  n_jobs=-1,
                  joblib_verbose=True)

In [28]:
gs.fit(data)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:   10.3s finished


In [29]:
gs.best_score['rmse']

0.9309360212369175

In [30]:
gs.best_params['rmse']

{'n_epochs': 10, 'lr_all': 0.002}

In [31]:
# Final Model ve Tahmin

In [32]:
dir(svd_model)
svd_model.n_epochs

20

In [33]:
svd_model = SVD(**gs.best_params['rmse'])

In [34]:
data = data.build_full_trainset()
svd_model.fit(data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x18996d950>

In [35]:
svd_model.predict(uid=1.0, iid=541, verbose=True)

user: 1.0        item: 541        r_ui = None   est = 4.16   {'was_impossible': False}


Prediction(uid=1.0, iid=541, r_ui=None, est=4.163882921290023, details={'was_impossible': False})