# 하루삼끼 추천시스템 2
## Hidden layer를 사용하는 Keras 기반의 MF(Matrix Factorization, 행렬 요인화) 구현
- 평가가 적음으로 평점이 적게 학습됨으로 평균을 사전에 파악한후 예측 결과에 추가하여 평점을 높임.

In [1]:
import os
import time

import pandas as pd
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam    # 가중치, bias 최적화

# csv 파일에서 불러오기
r_cols = ['memberno', 'productno', 'rating'] # 사용자 아이디, 아이템 아이디, 평점 데이터
ratings = pd.read_csv('u.data', names=r_cols,  sep=',', encoding='utf-8')
ratings = ratings[['memberno', 'productno', 'rating']].astype(int)            # timestamp 제거

# train test 분리
from sklearn.utils import shuffle

TRAIN_SIZE = 0.75
ratings = shuffle(ratings) # DataFrame형태의 데이터를 무작위로 섞음
cutoff = int(TRAIN_SIZE * len(ratings))
print('cutoff:', cutoff)
ratings_train = ratings.iloc[:cutoff] # 0:75000b
ratings_test = ratings.iloc[cutoff:]  # 75000:100000

cutoff: 225


In [2]:
# u.item 파일을 DataFrame으로 읽기
i_cols = ['productno', 'name', 'cate1','cate2','cate3','cate4','cate5']
products = pd.read_csv('u.item', sep=',', names=i_cols, encoding='utf-8')
products = products.set_index('productno')
products.head()

Unnamed: 0_level_0,name,cate1,cate2,cate3,cate4,cate5
productno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,맛있닭 닭가슴살 소시지 훈제맛,1,0,0,0,0
2,잇메이트 저염 훈제닭가슴살,1,0,0,0,0
3,아워홈 참나무향 닭가슴살,1,0,0,0,0
4,러브잇 닭가슴살 짜장 매운맛,1,0,0,0,0
5,굽네 훈제 닭가슴살 오리지널,1,0,0,0,0


In [3]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dot, Add, Flatten
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import SGD, Adamax
from tensorflow.keras.callbacks import EarlyStopping   # 학습 자동 중지
from tensorflow.keras.callbacks import ModelCheckpoint # 우수한 학습 모델 파일 저장

In [4]:
print(ratings.memberno.max())
print(ratings.memberno.max()+1) 
print(ratings.productno.max())
print(ratings.productno.max()+1) 

50
51
25
26


In [5]:
# Variable 초기화 
K = 100                             # Latent factor 수, 잠재 요인수 
mu = ratings_train.rating.mean()    # 전체 평균, 연산의 규모를 줄이기 위하여  
M = ratings.memberno.max() + 1       # Number of members
N = ratings.productno.max() + 1      # Number of products

In [6]:
# Keras model
user = Input(shape=(1, ))                                               # User input
item = Input(shape=(1, ))                                               # Item input
# Embedding(입력차원, 출력차원, 정규화 알고리즘)
P_embedding = Embedding(M, K, embeddings_regularizer=l2())(user)        # (M, 1, K)
print(P_embedding.shape)
Q_embedding = Embedding(N, K, embeddings_regularizer=l2())(item)        # (N, 1, K)
print(Q_embedding.shape)
user_bias = Embedding(M, 1, embeddings_regularizer=l2())(user)          # User bias term (M, 1, )
item_bias = Embedding(N, 1, embeddings_regularizer=l2())(item)          # Item bias term (N, 1, )

(None, 1, 100)
(None, 1, 100)


In [7]:
# layers.dot
x = np.arange(4).reshape(2, 2)
print(x)
y = np.arange(4).reshape(2, 2)
print(y)

[[0 1]
 [2 3]]
[[0 1]
 [2 3]]


In [8]:
# 0 * 0 + 1 * 1, 2 * 2 + 3 * 3
layers.dot([x, y], axes=1) # 0 지원 안함, 1: 행우선(행단위) 연산

<tf.Tensor: shape=(2, 1), dtype=int32, numpy=
array([[ 1],
       [13]])>

In [9]:
# layers.dot([x, y], axes=2) # ERROR, 2: 행우선(행단위) 연산
x = np.arange(3).reshape(1, 1, 3)
print(x)
y = np.arange(3).reshape(1, 1, 3)
print(y)

[[[0 1 2]]]
[[[0 1 2]]]


In [10]:
layers.dot([x, y], axes=1) 

<tf.Tensor: shape=(1, 3, 3), dtype=int32, numpy=
array([[[0, 0, 0],
        [0, 1, 2],
        [0, 2, 4]]])>

In [11]:
layers.dot([x, y], axes=2) # 열우선 연산

<tf.Tensor: shape=(1, 1, 1), dtype=int32, numpy=array([[[5]]])>

In [12]:
# Concatenate layers
from tensorflow.keras.layers import Dense, Concatenate, Activation
P_embedding = Flatten()(P_embedding)                                    # (K, )
Q_embedding = Flatten()(Q_embedding)                                    # (K, )
user_bias = Flatten()(user_bias)                                        # (1, )
item_bias = Flatten()(item_bias)                                        # (1, )
R = Concatenate()([P_embedding, Q_embedding, user_bias, item_bias])     # (2K + 2, )

In [13]:
# Neural network
R = Dense(2048)(R)
R = Activation('linear')(R)
R = Dense(256)(R)
R = Activation('linear')(R)
R = Dense(1)(R)

In [14]:
model = Model(inputs=[user, item], outputs=R)
model.compile(loss='mse', optimizer=Adam(lr=0.01),metrics=['mse'])
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 embedding (Embedding)          (None, 1, 100)       5100        ['input_1[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, 1, 100)       2600        ['input_2[0][0]']                
                                                                                              

  super(Adam, self).__init__(name, **kwargs)


In [15]:
es = EarlyStopping(patience=10, restore_best_weights=True)

start = time.time()
result = model.fit(
  x=[ratings_train.memberno.values, ratings_train.productno.values],
  y=ratings_train.rating.values - mu,
  epochs=30, batch_size=30,
  validation_data=(
    [ratings_test.memberno.values, ratings_test.productno.values], ratings_test.rating.values - mu
  ), 
  callbacks=[es]
)
end = time.time()
print('Runtime: {0:.2f} 초'.format(end-start))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Runtime: 7.72 초


In [16]:
model.save('./personalization_mu_mlp2.h5')

In [48]:
import matplotlib.pyplot as plt
plt.plot(result.history['loss'], label="Train loss")
plt.plot(result.history['val_loss'], label="Test loss")
plt.xlabel('epoch')
plt.ylabel('MSE')
plt.legend()
plt.show()

TypeError: 'NoneType' object is not subscriptable

In [18]:
# 테스트 데이터 Prediction
user_ids = ratings_test.memberno.values[0:6]
movie_ids = ratings_test.productno.values[0:6]
predictions = model.predict([user_ids, movie_ids]) + mu
print("Actuals: \n", ratings_test[0:6])
print( )
print("Predictions: \n", predictions)

Actuals: 
      memberno  productno  rating
194        20         19       4
114        33          9       4
272        33         20       4
150         1         10       4
245         2          4       4
42         10         11       2

Predictions: 
 [[2.4893556]
 [3.27747  ]
 [4.411018 ]
 [3.0096974]
 [3.793476 ]
 [4.9037437]]


In [19]:
df6 = ratings_test[0:6]
df6

Unnamed: 0,memberno,productno,rating
194,20,19,4
114,33,9,4
272,33,20,4
150,1,10,4
245,2,4,4
42,10,11,2


In [20]:
# print(type(predictions))
df6['predictions'] = predictions
df6

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,memberno,productno,rating,predictions
194,20,19,4,2.489356
114,33,9,4,3.27747
272,33,20,4,4.411018
150,1,10,4,3.009697
245,2,4,4,3.793476
42,10,11,2,4.903744


In [21]:
ratings_test[0:6]

Unnamed: 0,memberno,productno,rating
194,20,19,4
114,33,9,4
272,33,20,4
150,1,10,4
245,2,4,4
42,10,11,2


In [22]:
df6.sort_values('predictions', ascending=False, inplace=True)
df6

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,memberno,productno,rating,predictions
42,10,11,2,4.903744
272,33,20,4,4.411018
245,2,4,4,3.793476
114,33,9,4,3.27747
150,1,10,4,3.009697
194,20,19,4,2.489356


In [23]:
# Prediction
user_ids = ratings_test.memberno.values
print(type(user_ids))
print(user_ids.shape)
product_ids = ratings_test.productno.values
predictions = model.predict([user_ids, product_ids]) + mu
print("Actuals: \n", ratings_test[0:6])
print( )
print("Predictions: \n", predictions)

<class 'numpy.ndarray'>
(75,)
Actuals: 
      memberno  productno  rating
194        20         19       4
114        33          9       4
272        33         20       4
150         1         10       4
245         2          4       4
42         10         11       2

Predictions: 
 [[2.4893556 ]
 [3.27747   ]
 [4.411018  ]
 [3.0096972 ]
 [3.793476  ]
 [4.9037437 ]
 [3.2389398 ]
 [3.7300603 ]
 [0.99938893]
 [4.020634  ]
 [2.4561076 ]
 [4.3779297 ]
 [2.85236   ]
 [3.3957129 ]
 [3.3455896 ]
 [3.7664096 ]
 [4.1647596 ]
 [4.1647596 ]
 [4.9967613 ]
 [4.1339197 ]
 [2.3172703 ]
 [4.0215983 ]
 [2.8126326 ]
 [3.7878873 ]
 [3.692148  ]
 [3.6850362 ]
 [2.341914  ]
 [3.425059  ]
 [3.9591405 ]
 [4.794595  ]
 [3.6101255 ]
 [2.9324057 ]
 [3.9615521 ]
 [2.1434674 ]
 [3.7527053 ]
 [4.16829   ]
 [2.7371883 ]
 [4.6923213 ]
 [2.6411486 ]
 [4.869233  ]
 [1.9710778 ]
 [2.9324057 ]
 [3.0700362 ]
 [4.5402536 ]
 [3.4487984 ]
 [4.9624143 ]
 [3.4471493 ]
 [2.851757  ]
 [3.040804  ]
 [3.023727  ]
 [2.6690967 

In [24]:
df = ratings_test.copy()
# print(type(predictions))
df['predictions'] = predictions
df.sort_values('predictions', ascending=False, inplace=True)
df.head(10)

Unnamed: 0,memberno,productno,rating,predictions
291,37,2,4,5.270503
237,42,13,2,5.035765
231,37,16,5,4.996761
207,9,15,3,4.962414
42,10,11,2,4.903744
218,10,7,5,4.877057
142,4,17,2,4.869233
203,32,8,4,4.794595
80,23,1,5,4.692321
151,27,20,3,4.626135


In [25]:
df[df['memberno'] == 30].head(10)

Unnamed: 0,memberno,productno,rating,predictions
219,30,13,2,2.812633
155,30,19,4,2.456108
160,30,23,3,2.341914


### 개별 회원 추천

In [26]:
# 모델 로딩
model = load_model('./personalization_mu_mlp.h5')

# 상품 로딩
i_cols = ['productno', 'name', 'cate1','cate2','cate3','cate4','cate5']
products = pd.read_csv('u.item', sep=',', names=i_cols, encoding='utf-8')
products = products.set_index('productno')
display(products.head())

# 평점 로딩
r_cols = ['memberno', 'productno', 'rating'] # 사용자 아이디, 아이템 아이디, 평점 데이터
ratings = pd.read_csv('u.data', names=r_cols,  sep=',', encoding='utf-8')
ratings = ratings[['memberno', 'productno', 'rating']].astype(int)            # timestamp 제거
display(ratings.head())

Unnamed: 0_level_0,name,cate1,cate2,cate3,cate4,cate5
productno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,맛있닭 닭가슴살 소시지 훈제맛,1,0,0,0,0
2,잇메이트 저염 훈제닭가슴살,1,0,0,0,0
3,아워홈 참나무향 닭가슴살,1,0,0,0,0
4,러브잇 닭가슴살 짜장 매운맛,1,0,0,0,0
5,굽네 훈제 닭가슴살 오리지널,1,0,0,0,0


Unnamed: 0,memberno,productno,rating
0,4,19,3
1,30,13,3
2,8,21,1
3,26,23,2
4,29,24,1


In [27]:
# 아직 구매하지 않은 상품 리스트 함수
def get_unorder(ratings, products, memberno):
     # 특정 user_id가 평점을 매긴 모든 상품 리스트
    order_products = ratings[ratings['memberno']== memberno]['productno'].tolist()
    
    # 모든 상품 list 객체로 만듬. 
    total_products = products.index.tolist()
      
    # 한줄 for + if문으로 구매하지 않은 상품 리스트 생성
    unorder_products = [ product for product in total_products if product not in order_products]
    
    total_product_cnt = len(total_products) # 모든 상품수
    order_cnt = len(order_products)         # 주문한 상품수
    unorder_cnt = len(unorder_products)     # 주문하지 않은 상품수
    
    print(f"전체 상품 수: {total_product_cnt}, 평점 매긴 상품 수: {order_cnt}, 추천 대상 상품 수: {unorder_cnt}")
    
    return unorder_products

In [28]:
memberno = int(input('회원 번호를 입력하세요(3-50):'))
unorder_products = get_unorder(ratings, products, memberno)

회원 번호를 입력하세요(3-50):30
전체 상품 수: 25, 평점 매긴 상품 수: 7, 추천 대상 상품 수: 19


In [29]:
user_ids = np.array([memberno] * 19)
print(user_ids)
product_ids = np.array(unorder_products)
print(product_ids)
print(mu)

[30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30]
[ 1  2  3  4  6  7  8  9 10 11 12 14 15 16 17 18 20 21 25]
3.56


In [31]:
print(len(unorder_products))
user_ids = np.array([memberno] * 19)
print(type(user_ids))
print(user_ids.shape)
product_ids = np.array(unorder_products)
print(product_ids.shape)
predictions = model.predict([user_ids, product_ids]) + mu
display(predictions[:5,0])

19
<class 'numpy.ndarray'>
(19,)
(19,)


array([3.9272854, 3.41038  , 3.2854269, 3.6819966, 3.779529 ],
      dtype=float32)

In [32]:
user_ids_df = pd.DataFrame(user_ids).rename(columns={0:'user_id'})
user_ids_df.head()

Unnamed: 0,user_id
0,30
1,30
2,30
3,30
4,30


In [33]:
product_ids_df = pd.DataFrame(product_ids).rename(columns={0:'productno'})
product_ids_df.head()

Unnamed: 0,productno
0,1
1,2
2,3
3,4
4,6


In [34]:
product_ids_df = pd.DataFrame(product_ids).rename(columns={0:'productno'})
product_ids_df.head()

Unnamed: 0,productno
0,1
1,2
2,3
3,4
4,6


In [35]:
predictions_df = pd.DataFrame(predictions).rename(columns={0:'prediction'})
predictions_df.head()

Unnamed: 0,prediction
0,3.927285
1,3.41038
2,3.285427
3,3.681997
4,3.779529


In [36]:
df = pd.concat([user_ids_df, product_ids_df, predictions_df], axis=1)
df.head()

Unnamed: 0,user_id,productno,prediction
0,30,1,3.927285
1,30,2,3.41038
2,30,3,3.285427
3,30,4,3.681997
4,30,6,3.779529


In [37]:
df.sort_values(['prediction'], ascending=False, inplace=True)
df.head()

Unnamed: 0,user_id,productno,prediction
10,30,12,4.508786
6,30,8,4.182733
11,30,14,4.174152
7,30,9,4.055599
8,30,10,3.951944


In [38]:
product_ids = df['productno']
print(product_ids)

10    12
6      8
11    14
7      9
8     10
9     11
0      1
5      7
12    15
4      6
3      4
16    20
18    25
13    16
1      2
2      3
14    17
17    21
15    18
Name: productno, dtype: int32


In [39]:
print(type(df['prediction']))
print(df['prediction'])

<class 'pandas.core.series.Series'>
10    4.508786
6     4.182733
11    4.174152
7     4.055599
8     3.951944
9     3.949545
0     3.927285
5     3.910068
12    3.870086
4     3.779529
3     3.681997
16    3.662662
18    3.577274
13    3.424046
1     3.410380
2     3.285427
14    3.217551
17    3.068177
15    3.001008
Name: prediction, dtype: float32


In [40]:
recom_df = products.loc[df['productno']]
recom_df.head(6)

Unnamed: 0_level_0,name,cate1,cate2,cate3,cate4,cate5
productno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12,밥보다 샐러드 닭가슴살햄슬라이스&스모크드치즈,0,0,1,0,0
8,밀스원 탄두리치킨볶음밥 & 참치오믈렛,0,1,0,0,0
14,수비드닭가슴살빅샐러드,0,0,1,0,0
9,맛있닭 더담은 도시락 닭가슴살 큐브마늘맛 & 연근우엉밥,0,1,0,0,0
10,더 담아 꽉~채운 계란야채곤약볶음밥&토마토소스닭가슴살슬라이스,0,1,0,0,0
11,포켓샐러드 닭가슴살 샐러드,0,0,1,0,0


In [41]:
recom_df['prediction'] = np.array(df['prediction'])
recom_df

Unnamed: 0_level_0,name,cate1,cate2,cate3,cate4,cate5,prediction
productno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
12,밥보다 샐러드 닭가슴살햄슬라이스&스모크드치즈,0,0,1,0,0,4.508786
8,밀스원 탄두리치킨볶음밥 & 참치오믈렛,0,1,0,0,0,4.182733
14,수비드닭가슴살빅샐러드,0,0,1,0,0,4.174152
9,맛있닭 더담은 도시락 닭가슴살 큐브마늘맛 & 연근우엉밥,0,1,0,0,0,4.055599
10,더 담아 꽉~채운 계란야채곤약볶음밥&토마토소스닭가슴살슬라이스,0,1,0,0,0,3.951944
11,포켓샐러드 닭가슴살 샐러드,0,0,1,0,0,3.949545
1,맛있닭 닭가슴살 소시지 훈제맛,1,0,0,0,0,3.927285
7,잇메이트 닭가슴살소세지볶음밥 마늘맛,0,1,0,0,0,3.910068
15,아임닭 23 단백질 닭가슴살 샐러드,0,0,1,0,0,3.870086
6,맛있닭 닭가슴살 한끼 곤약볶음밥 양송이,0,1,0,0,0,3.779529


In [42]:
def recomm_product(model, memberno, unorder_products, top_n=10, mu=3.56):
    user_ids = np.array([memberno] * len(unorder_products))
    product_ids = np.array(unorder_products)
    predictions = model.predict([user_ids, product_ids]) + mu
    display(predictions[:5,0])
    
    user_ids_df = pd.DataFrame(user_ids).rename(columns={0:'memberno'})
    product_ids_df = pd.DataFrame(product_ids).rename(columns={0:'productno'})
    predictions_df = pd.DataFrame(predictions).rename(columns={0:'prediction'})
    df = pd.concat([user_ids_df, product_ids_df, predictions_df], axis=1)
    df.sort_values(['prediction'], ascending=False, inplace=True)
    
    df_sub = df.head(top_n)
    recom_df = products.loc[df_sub['productno']]
    recom_df['prediction'] = np.array(df_sub['prediction'])
    
    return recom_df[['name', 'prediction']]

In [45]:
mu = 3.56
model = load_model('./personalization_mu_mlp.h5')
user_id = int(input('회원 번호를 입력하세요(1):')) # 1
unorder_products = get_unorder(ratings, products, memberno)
df = recomm_product(model, memberno, unorder_products, 10, mu)
display(df)

회원 번호를 입력하세요(1):30
전체 상품 수: 25, 평점 매긴 상품 수: 7, 추천 대상 상품 수: 19


array([3.9272854, 3.41038  , 3.2854269, 3.6819966, 3.779529 ],
      dtype=float32)

Unnamed: 0_level_0,name,prediction
productno,Unnamed: 1_level_1,Unnamed: 2_level_1
12,밥보다 샐러드 닭가슴살햄슬라이스&스모크드치즈,4.508786
8,밀스원 탄두리치킨볶음밥 & 참치오믈렛,4.182733
14,수비드닭가슴살빅샐러드,4.174152
9,맛있닭 더담은 도시락 닭가슴살 큐브마늘맛 & 연근우엉밥,4.055599
10,더 담아 꽉~채운 계란야채곤약볶음밥&토마토소스닭가슴살슬라이스,3.951944
11,포켓샐러드 닭가슴살 샐러드,3.949545
1,맛있닭 닭가슴살 소시지 훈제맛,3.927285
7,잇메이트 닭가슴살소세지볶음밥 마늘맛,3.910068
15,아임닭 23 단백질 닭가슴살 샐러드,3.870086
6,맛있닭 닭가슴살 한끼 곤약볶음밥 양송이,3.779529


In [49]:
model1 = load_model('./personalization_mu_mlp.h5')
model2 = load_model('./personalization_mu_mlp2.h5')