<a href="https://colab.research.google.com/github/soaeng/recomSys/blob/main/CF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 📌 **기본 CF 알고리즘**

In [10]:
# @title 데이터 불러오기

import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

base_src = 'drive/MyDrive/RecoSys/Data'

# user 데이터
u_user_src = os.path.join(base_src, 'u.user')
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv(u_user_src, sep = '|', names = u_cols, encoding = 'latin-1')
users = users.set_index('user_id')

# movie 데이터
u_item_src = os.path.join(base_src, 'u.item')
i_cols = ['movie_id', 'title', 'release data', 'video release data', 'IMDB URL',
          'unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy',
          'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
          'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
movies = pd.read_csv(u_item_src, sep = '|', names = i_cols, encoding = 'latin-1')
movies = movies.set_index('movie_id')

# rating 데이터
u_data_src = os.path.join(base_src, 'u.data')
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv(u_data_src, sep = '\t', names = r_cols, encoding = 'latin-1')

In [17]:
# @title 정확도 예측 함수

# 정확도(RMSE) 계산
def RMSE(y_true, y_pred):
  return np.sqrt(np.mean((np.array(y_true) - np.array(y_pred)) ** 2))

# 모델별 RMSE 계산
def score(model):
  # 테스트 데이터의 user_id와 movie_id 간 pair를 맞춰 튜플형 원소 리스트 데이터를 생성
  id_pairs = zip(x_test['user_id'], x_test['movie_id'])
  # 모든 사용자-영화 pair에 대해서 주어진 예측 모델에 의한 예측값 계산 및 리스트 데이터 생성
  y_pred = np.array([model(user, movie) for (user, movie) in id_pairs])
  # 실제 평점값
  y_true = np.array(x_test['rating'])
  return RMSE(y_true, y_pred)

# 데이터셋 생성
x = ratings.copy()
y = ratings['user_id']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, stratify = y)
rating_matrix = x_train.pivot(index = 'user_id', columns = 'movie_id', values = 'rating')

In [12]:
# @title 코사인 유사도 계산

from sklearn.metrics.pairwise import cosine_similarity

matrix_dummy = rating_matrix.copy().fillna(0)
user_similarity = cosine_similarity(matrix_dummy, matrix_dummy)
user_similarity = pd.DataFrame(user_similarity, index = rating_matrix.index, columns = rating_matrix.index)
user_similarity

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.131524,0.029309,0.029267,0.270231,0.337902,0.317028,0.216077,0.065790,0.260959,...,0.267283,0.118663,0.252868,0.120509,0.180525,0.113500,0.238995,0.130502,0.113670,0.281856
2,0.131524,1.000000,0.098981,0.117372,0.034053,0.159846,0.071211,0.058660,0.081954,0.089882,...,0.089813,0.175860,0.259162,0.179916,0.234674,0.152453,0.214636,0.072848,0.107027,0.086360
3,0.029309,0.098981,1.000000,0.273115,0.029405,0.078473,0.054825,0.060784,0.062904,0.055192,...,0.019276,0.039268,0.139763,0.077143,0.060166,0.020060,0.137381,0.088066,0.068247,0.037120
4,0.029267,0.117372,0.273115,1.000000,0.013213,0.040300,0.046498,0.111279,0.000000,0.037096,...,0.000000,0.000000,0.110322,0.085593,0.098465,0.000000,0.148897,0.075378,0.110338,0.037068
5,0.270231,0.034053,0.029405,0.013213,1.000000,0.179919,0.299594,0.190298,0.040578,0.160485,...,0.228165,0.058260,0.052892,0.060452,0.126783,0.079711,0.179834,0.086024,0.099788,0.185404
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.113500,0.152453,0.020060,0.000000,0.079711,0.101034,0.119510,0.064197,0.000000,0.064270,...,0.046654,0.354247,0.150892,0.154904,0.409508,1.000000,0.117964,0.236958,0.035468,0.095212
940,0.238995,0.214636,0.137381,0.148897,0.179834,0.260398,0.246882,0.206126,0.112824,0.275252,...,0.247956,0.062059,0.163595,0.122653,0.172973,0.117964,1.000000,0.127620,0.189200,0.111484
941,0.130502,0.072848,0.088066,0.075378,0.086024,0.131185,0.036481,0.100655,0.000000,0.057415,...,0.061179,0.184239,0.207195,0.145094,0.328416,0.236958,0.127620,1.000000,0.100457,0.080496
942,0.113670,0.107027,0.068247,0.110338,0.099788,0.229540,0.196613,0.129428,0.095076,0.188267,...,0.162846,0.022396,0.046310,0.143403,0.080070,0.035468,0.189200,0.100457,1.000000,0.145177


In [18]:
# @title 가중평균 rating 계산

def CF_simple(user_id, movie_id):
  if movie_id in rating_matrix.columns:
    sim_scores = user_similarity[user_id].copy()
    movie_ratings = rating_matrix[movie_id].copy()
    none_rating_idx = movie_ratings[movie_ratings.isnull()].index
    movie_ratings = movie_ratings.dropna()
    sim_scores = sim_scores.drop(none_rating_idx)
    mean_rating = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
  else:
    mean_rating = 3.0
  return mean_rating

score(CF_simple)

1.0158446410086883

---
# 📌 **이웃을 고려한 CF**

In [43]:
# @title 공통파트

import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

base_src = 'drive/MyDrive/RecoSys/Data'

# user 데이터
u_user_src = os.path.join(base_src, 'u.user')
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv(u_user_src, sep = '|', names = u_cols, encoding = 'latin-1')
users = users.set_index('user_id')

# movie 데이터
u_item_src = os.path.join(base_src, 'u.item')
i_cols = ['movie_id', 'title', 'release data', 'video release data', 'IMDB URL',
          'unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy',
          'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
          'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
movies = pd.read_csv(u_item_src, sep = '|', names = i_cols, encoding = 'latin-1')
movies = movies.set_index('movie_id')

# rating 데이터
u_data_src = os.path.join(base_src, 'u.data')
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv(u_data_src, sep = '\t', names = r_cols, encoding = 'latin-1')

# 정확도(RMSE) 계산
def RMSE(y_true, y_pred):
  return np.sqrt(np.mean((np.array(y_true) - np.array(y_pred)) ** 2))

# 모델별 RMSE 계산
def score(model, neighbor_size = 0):
  id_pairs = zip(x_test['user_id'], x_test['movie_id'])
  y_pred = np.array([model(user, movie, neighbor_size) for (user, movie) in id_pairs])
  y_true = np.array(x_test['rating'])
  return RMSE(y_true, y_pred)

# 데이터셋 생성
x = ratings.copy()
y = ratings['user_id']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, stratify = y)
rating_matrix = x_train.pivot(index = 'user_id', columns = 'movie_id', values = 'rating')

# 코사인 유사도 계산
matrix_dummy = rating_matrix.copy().fillna(0)
user_similarity = cosine_similarity(matrix_dummy, matrix_dummy)
user_similarity = pd.DataFrame(user_similarity, index = rating_matrix.index, columns = rating_matrix.index)

In [25]:
# @title knn 사용한 CF

def CF_knn(user_id, movie_id, neighbor_size = 0):
  # train/test set의 분할에 따라 rating_matrix에 해당 영화가 있는지 확인
  if movie_id in rating_matrix.columns:
    # 주어진 사용자(user_id)와 다른 사용자의 유사도 추출
    sim_scores = user_similarity[user_id].copy()
    # 주어진 영화(movie_id)와 다른 사용자의 유사도 추출
    movie_ratings = rating_matrix[movie_id].copy()
    # 주어진 영화에 대해 평가하지 않은 사용자를 가중평균계산에서 제외하기 위해 인덱스 추출
    none_rating_idx = movie_ratings[movie_ratings.isnull()].index
    # 위에서 뽑은 사람 제외
    movie_ratings = movie_ratings.dropna()
    # 평가하지 않은 사용자와의 유사도 제거
    sim_scores = sim_scores.drop(none_rating_idx)

    # neighbor_size가 지정되지 않은 경우
    if neighbor_size == 0:
      mean_rating = np.dot(sim_scores, movie_ratings) / sim_scores.sum()

    # neighbor_size가 지정된 경우
    else :
      if len(sim_scores) > 1:
        neighbor_size = min(neighbor_size, len(sim_scores))
        sim_scores = np.array(sim_scores)
        movie_ratings = np.array(movie_ratings)
        user_idx = np.argsort(sim_scores)
        sim_scores = sim_scores[user_idx][-neighbor_size:]
        movie_ratings = movie_ratings[user_idx][-neighbor_size:]
        mean_rating = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
      else:
        mean_rating = 3.0
  else:
    mean_rating = 3.0
  return mean_rating

score(CF_knn, neighbor_size = 30)

# neighbor size에 변화하는 RMSE 계산
for neighbor_size in [10, 20, 30, 40, 50, 60]:
  print('Neighbor size = %d: RMSE = %.4f' %(neighbor_size, score(CF_knn, neighbor_size)))

Neighbor size = 10: RMSE = 1.0303
Neighbor size = 20: RMSE = 1.0161
Neighbor size = 30: RMSE = 1.0131
Neighbor size = 40: RMSE = 1.0126
Neighbor size = 50: RMSE = 1.0132
Neighbor size = 60: RMSE = 1.0136


In [26]:
# @title 실제 주어진 사용자에 대해 추천을 받는 기능

def recom_movie(user_id, n_items, neighbor_size = 30):
  user_movie = rating_matrix.loc[user_id].copy()

  for movie in rating_matrix.columns:
    if pd.notnull(user_movie.loc[movie]):
      user_movie.loc[movie] = 0
    else:
      user_movie.loc[movie] = CF_knn(user_id, movie, neighbor_size)

  movie_sort = user_movie.sort_values(ascending = False)[:n_items]
  recom_movies = movies.loc[movie_sort.index]
  recommendations = recom_movies['title']
  return recommendations

recom_movie(user_id = 729, n_items = 5, neighbor_size = 30)

movie_id
1612                   Leading Man, The (1996)
868                    Hearts and Minds (1996)
119     Maya Lin: A Strong Clear Vision (1994)
1293                           Star Kid (1997)
1189                        Prefontaine (1997)
Name: title, dtype: object

# 📌 **사용자의 평가경향을 고려한 CF**

In [28]:
# @title 각 사용자 평점평균 계산

rating_mean = rating_matrix.mean(axis = 1)
rating_mean

user_id
1      3.602941
2      3.608696
3      2.707317
4      4.388889
5      2.954198
         ...   
939    4.243243
940    3.550000
941    3.937500
942    4.271186
943    3.452381
Length: 943, dtype: float64

In [29]:
# @title 평점편차 계산
rating_bias = (rating_matrix.T - rating_mean).T
rating_bias

movie_id,1,2,3,4,5,6,7,8,9,10,...,1665,1667,1669,1673,1674,1675,1676,1679,1680,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.397059,,0.397059,-0.602941,-0.602941,1.397059,0.397059,,1.397059,,...,,,,,,,,,,
2,0.391304,,,,,,,,,-1.608696,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,1.045802,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,,,,,,,,,0.756757,,...,,,,,,,,,,
940,,,,-1.550000,,,0.450000,1.45,,,...,,,,,,,,,,
941,,,,,,,,,,,...,,,,,,,,,,
942,,,,,,,,,,,...,,,,,,,,,,


In [31]:
# @title 사용자 평가 경향을 고려한 함수

def CF_knn_bias(user_id, movie_id, neighbor_size = 0):
  if movie_id in rating_bias.columns:
    sim_scores = user_similarity[user_id].copy()
    movie_ratings = rating_bias[movie_id].copy()

    none_rating_idx = movie_ratings[movie_ratings.isnull()].index
    movie_ratings = movie_ratings.drop(none_rating_idx)
    sim_scores = sim_scores.drop(none_rating_idx)

    if neighbor_size == 0:
      prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
      prediction += rating_mean[user_id]

    else:
      if len(sim_scores) > 1:
        neighbor_size = min(neighbor_size, len(sim_scores))
        sim_scores = np.array(sim_scores)
        movie_ratings = np.array(movie_ratings)

        user_idx = np.argsort(sim_scores)
        sim_scores = sim_scores[user_idx][-neighbor_size:]
        movie_ratings = movie_ratings[user_idx][-neighbor_size:]

        prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
        prediction += rating_mean[user_id]

      else:
        prediction = rating_mean[user_id]
  else:
    prediction = rating_mean[user_id]

  return prediction

score(CF_knn_bias, 30)

0.9469672941279534

# 📌 **신뢰도 고려한 CF**

In [34]:
# @title 사용자 평점편차 계산

rating_mean = rating_matrix.mean(axis = 1)
rating_bias = (rating_matrix.T - rating_mean).T

rating_binary_1 = np.array(rating_matrix > 0).astype(float)
rating_binary_2 = rating_binary_1.T

counts = np.dot(rating_binary_1, rating_binary_2)
counts = pd.DataFrame(counts, index = rating_matrix.index, columns = rating_matrix.index).fillna(0)
counts

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,204.0,9.0,7.0,4.0,45.0,57.0,84.0,18.0,3.0,39.0,...,48.0,10.0,26.0,15.0,23.0,10.0,26.0,3.0,12.0,46.0
2,9.0,46.0,4.0,5.0,4.0,20.0,9.0,4.0,2.0,5.0,...,10.0,7.0,14.0,11.0,13.0,6.0,11.0,2.0,9.0,3.0
3,7.0,4.0,41.0,8.0,1.0,6.0,8.0,6.0,1.0,4.0,...,2.0,1.0,11.0,4.0,8.0,2.0,6.0,2.0,6.0,1.0
4,4.0,5.0,8.0,18.0,1.0,4.0,6.0,7.0,0.0,2.0,...,2.0,0.0,7.0,3.0,5.0,1.0,5.0,3.0,5.0,3.0
5,45.0,4.0,1.0,1.0,131.0,25.0,63.0,12.0,3.0,19.0,...,37.0,5.0,6.0,4.0,10.0,3.0,15.0,3.0,8.0,34.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,10.0,6.0,2.0,1.0,3.0,6.0,6.0,2.0,1.0,3.0,...,4.0,8.0,11.0,8.0,17.0,37.0,2.0,2.0,2.0,4.0
940,26.0,11.0,6.0,5.0,15.0,30.0,41.0,12.0,2.0,24.0,...,27.0,4.0,16.0,5.0,11.0,2.0,80.0,3.0,13.0,13.0
941,3.0,2.0,2.0,3.0,3.0,6.0,2.0,4.0,1.0,2.0,...,0.0,2.0,9.0,5.0,8.0,2.0,3.0,16.0,2.0,3.0
942,12.0,9.0,6.0,5.0,8.0,21.0,27.0,7.0,1.0,12.0,...,12.0,3.0,9.0,3.0,7.0,2.0,13.0,2.0,59.0,12.0


In [36]:
# @title 신뢰도 기반 cf

def CF_knn_bias_sig(user_id, movie_id, neighbor_size = 0):
  if movie_id in rating_bias:
    sim_scores = user_similarity[user_id].copy()
    movie_ratings = rating_bias[movie_id].copy()

    no_rating = movie_ratings.isnull()
    # user_id와 공통으로 평가한 영화 수
    common_counts = counts[user_id]
    low_significance = common_counts < SIG_LEVEL
    none_rating_idx = movie_ratings[no_rating | low_significance].index

    movie_ratings = movie_ratings.drop(none_rating_idx)
    sim_scores = sim_scores.drop(none_rating_idx)

    if neighbor_size == 0:
      prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
      prediction += rating_mean[user_id]

    else:
      if len(sim_scores) > MIN_RATINGS:
        neighbor_size = min(neighbor_size, len(sim_scores))
        sim_scores = np.array(sim_scores)
        movie_ratings = np.array(movie_ratings)

        user_idx = np.argsort(sim_scores)
        sim_scores = sim_scores[user_idx][-neighbor_size:]
        movie_ratings = movie_ratings[user_idx][-neighbor_size:]

        prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
        prediction += rating_mean[user_id]

      else:
        prediction = rating_mean[user_id]
  else:
    prediction = rating_mean[user_id]

  if prediction <= 1:
    prediction = 1
  elif prediction >= 5:
    prediction = 5

  return prediction

SIG_LEVEL = 3
MIN_RATINGS = 3
score(CF_knn_bias_sig, 30)

0.9464798109735281

# 📌 사용자 기반 CF와 아이템 기반 CF

In [44]:
# @title neighbor_size 고려하지 않는 데이터 셋 설정

# 모델별 RMSE 계산
def score(model):
  id_pairs = zip(x_test['user_id'], x_test['movie_id'])
  y_pred = np.array([model(user, movie) for (user, movie) in id_pairs])
  y_true = np.array(x_test['rating'])
  return RMSE(y_true, y_pred)

rating_matrix_t = np.transpose(rating_matrix)
matrix_dummy = rating_matrix_t.copy().fillna(0)

item_similarity = cosine_similarity(matrix_dummy, matrix_dummy)
item_similarity = pd.DataFrame(item_similarity, index = rating_matrix_t.index, columns = rating_matrix_t.index)
item_similarity

movie_id,1,2,3,4,5,6,7,8,9,10,...,1668,1669,1671,1672,1673,1676,1677,1678,1681,1682
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.318634,0.262439,0.353937,0.240039,0.086152,0.475882,0.359441,0.386962,0.204293,...,0.0,0.0,0.0,0.054024,0.040518,0.0,0.040518,0.0,0.054024,0.054024
2,0.318634,1.000000,0.228847,0.341153,0.254527,0.079042,0.329362,0.295613,0.192869,0.152258,...,0.0,0.0,0.0,0.090045,0.000000,0.0,0.000000,0.0,0.090045,0.090045
3,0.262439,0.228847,1.000000,0.259567,0.105927,0.012996,0.279254,0.136606,0.223729,0.081927,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.037012,0.0,0.000000,0.111035
4,0.353937,0.341153,0.259567,1.000000,0.236921,0.065614,0.407440,0.352331,0.307658,0.161538,...,0.0,0.0,0.0,0.065954,0.000000,0.0,0.043969,0.0,0.065954,0.087939
5,0.240039,0.254527,0.105927,0.236921,1.000000,0.047558,0.244849,0.207285,0.160919,0.052141,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.106000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1676,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.077703,0.066245,0.092231,...,0.0,0.0,0.0,0.000000,0.000000,1.0,0.000000,0.0,0.000000,0.000000
1677,0.040518,0.000000,0.037012,0.043969,0.000000,0.000000,0.058602,0.097129,0.082806,0.000000,...,0.0,0.0,0.0,0.000000,0.000000,0.0,1.000000,0.0,0.000000,0.000000
1678,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,1.0,0.000000,0.000000
1681,0.054024,0.090045,0.000000,0.065954,0.000000,0.000000,0.058602,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,1.000000,0.000000,0.0,0.000000,0.0,1.000000,0.000000


In [45]:
# @title 아이템 기반 CF

def CF_IBCF(user_id, movie_id):
  if movie_id in item_similarity.columns:
    sim_scores = item_similarity[movie_id]
    user_rating = rating_matrix_t[user_id]
    none_rating_idx = user_rating[user_rating.isnull()].index
    user_rating = user_rating.dropna()

    sim_scores = sim_scores.drop(none_rating_idx)
    mean_rating = np.dot(sim_scores, user_rating) / sim_scores.sum()

  else:
    mean_rating = 3.0

  return mean_rating

score(CF_IBCF)

1.011298698082046