### 아이템 기반 CF

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras

u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv('/content/drive/MyDrive/recosys/u.user', sep = '|', names = u_cols, encoding = 'latin-1')

i_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'IMDB URL', 'unknown', 'Action',
          'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film_Horror',
          'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'war', 'western']

movies = pd.read_csv('/content/drive/MyDrive/recosys/u.item', sep = '|', names = i_cols, encoding = 'latin-1')

r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv('/content/drive/MyDrive/recosys/u.data', sep = '\t', names = r_cols, encoding = 'latin-1')

In [2]:
from sklearn.model_selection import train_test_split
x = ratings.copy()
y = ratings['user_id']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, stratify = y)

def RMSE(y_true, y_pred):
  return np.sqrt(np.mean((np.array(y_true)-np.array(y_pred))**2))

def score(model):
  id_pairs = zip(x_test['user_id'], x_test['movie_id'])
  y_pred = np.array([model(user, movie) for (user, movie) in id_pairs])
  y_true = np.array(x_test['rating'])

  return RMSE(y_true, y_pred)

In [3]:
rating_matrix = x_train.pivot(index = 'user_id', columns = 'movie_id', values = 'rating')

from sklearn.metrics.pairwise import cosine_similarity
rating_matrix_t = np.transpose(rating_matrix)
matrix_dummy = rating_matrix_t.copy().fillna(0)
item_similarity = cosine_similarity(matrix_dummy, matrix_dummy)
item_similarity = pd.DataFrame(item_similarity, index = rating_matrix_t.index, columns = rating_matrix_t.index)

In [5]:
def cf_ibcf(user_id, movie_id):
  if movie_id in item_similarity:
    sim_scores = item_similarity[movie_id]
    user_rating = rating_matrix_t[user_id]
    non_rating_idx = user_rating[user_rating.isnull()].index
    user_rating = user_rating.dropna()
    sim_scores = sim_scores.drop(non_rating_idx)
    mean_rating = np.dot(sim_scores, user_rating) / sim_scores.sum()

  else:
    mean_rating = 3.0

  return mean_rating

score(cf_ibcf)

1.0138906743187113