# AutoInt+ 모델 구현 및 학습

In [69]:
import time
import random
import pandas as pd
import numpy as np
import os

from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, MaxPooling2D, Conv2D, Dropout, Lambda, Dense, Flatten, Activation, Input, Embedding, BatchNormalization
from tensorflow.keras.initializers import glorot_normal, Zeros, TruncatedNormal
from tensorflow.keras.regularizers import l2


from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy


from tensorflow.keras.optimizers import Adam
from collections import defaultdict
import math

## 1. Layer 정의

#### 1.1 임베딩 레이어

In [70]:
class FeaturesEmbedding(Layer):  
    '''
    임베딩 레이어
    - 만약 피처(feature) 3개가 각각 10개, 20개, 30개의 고유값을 가진다면 feature_dims는 [10, 20, 30] 형태를 띄게 됨
    - 전체 임베딩을 해야 할 개수는 10+20+30 = 60이므로 '60 x 임베딩_차원_크기'의 행렬이 생성
    '''
    def __init__(self, field_dims, embed_dim, **kwargs):
        super(FeaturesEmbedding, self).__init__(**kwargs)
        self.total_dim = sum(field_dims)
        self.embed_dim = embed_dim
        self.offsets = np.array((0, *np.cumsum(field_dims)[:-1]), dtype=np.longlong)
        self.embedding = tf.keras.layers.Embedding(input_dim=self.total_dim, output_dim=self.embed_dim)

    def build(self, input_shape):
        # 임베딩을 빌드하고 초기화
        self.embedding.build(input_shape)
        self.embedding.set_weights([tf.keras.initializers.GlorotUniform()(shape=self.embedding.weights[0].shape)])

    def call(self, x):
        # 들어온 입력의 임베딩을 가짐
        x = x + tf.constant(self.offsets)
        return self.embedding(x)

#### 1.2 DNN 레이어

In [71]:
class MultiLayerPerceptron(Layer):  
    '''
    DNN 레이어
    - Tensorflow Keras에서는 Dense 레이어를 쌓아올린 구조
    - 필요에 따라 배치 정규화도 사용할 수 있음
    '''
    def __init__(self, input_dim, hidden_units, activation='relu', l2_reg=0, dropout_rate=0, use_bn=False, init_std=0.0001, output_layer=True):
        super(MultiLayerPerceptron, self).__init__()
        self.dropout_rate = dropout_rate
        self.use_bn = use_bn
        hidden_units = [input_dim] + list(hidden_units)
        if output_layer:
            hidden_units += [1]
        # Dense layer를 쌓아올립니다.
        self.linears = [Dense(units, activation=None, kernel_initializer=tf.random_normal_initializer(stddev=init_std),
                              kernel_regularizer=tf.keras.regularizers.l2(l2_reg)) for units in hidden_units[1:]]
        # 활성화 함수를 세팅합니다.
        self.activation = tf.keras.layers.Activation(activation)
        # 필요하다면 배치정규화도 진행합니다.
        if self.use_bn:
            self.bn = [BatchNormalization() for _ in hidden_units[1:]]
        self.dropout = Dropout(dropout_rate)

    def call(self, inputs, training=False):
        x = inputs
        for i in range(len(self.linears)):
            # input data가 들어오면 layer를 돌면서 벡터 값을 가져오게 됩니다.
            x = self.linears[i](x)
            if self.use_bn:
                x = self.bn[i](x, training=training)
            # 각 layer마다 나온 벡터 값에 활성화 함수와 dropout을 적용시켜 비선형성 구조와 과적합을 방지합니다.
            x = self.activation(x)
            x = self.dropout(x, training=training)
        return x

#### 1.3 멀티 헤드 셀프 어텐션 레이어

In [72]:
class MultiHeadSelfAttention(Layer):  
    '''
    멀티 헤드 셀프 어텐션 레이어
    - 위에 작성한 수식과 같이 동작
    - 필요에 따라 잔차 연결(residual connection)도 진행
    '''
    def __init__(self, att_embedding_size=8, head_num=2, use_res=True, scaling=False, seed=1024, **kwargs):
        if head_num <= 0:
            raise ValueError('head_num must be a int > 0')
        self.att_embedding_size = att_embedding_size
        self.head_num = head_num
        self.use_res = use_res
        self.seed = seed
        self.scaling = scaling
        super(MultiHeadSelfAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        if len(input_shape) != 3:
            raise ValueError(
                "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape)))
        embedding_size = int(input_shape[-1])
        # 쿼리에 해당하는 매트릭스
        self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num],
                                       dtype=tf.float32,
                                       initializer=TruncatedNormal(seed=self.seed))
        # 키에 해당되는 매트릭스
        self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num],
                                     dtype=tf.float32,
                                     initializer=TruncatedNormal(seed=self.seed + 1))
        # 값(value)에 해당되는 매트릭스
        self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num],
                                       dtype=tf.float32,
                                       initializer=TruncatedNormal(seed=self.seed + 2))
        # 필요하다면 잔차 연결도 할 수 있음
        if self.use_res:
            self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num],
                                         dtype=tf.float32,
                                         initializer=TruncatedNormal(seed=self.seed))

        super(MultiHeadSelfAttention, self).build(input_shape)

    def call(self, inputs, **kwargs):
        if K.ndim(inputs) != 3:
            raise ValueError("Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
        
        # 입력이 들어오면 쿼리, 키, 값(value)에 매칭되어 각각의 값을 가져옴
        querys = tf.tensordot(inputs, self.W_Query, axes=(-1, 0))  
        keys = tf.tensordot(inputs, self.W_key, axes=(-1, 0))
        values = tf.tensordot(inputs, self.W_Value, axes=(-1, 0))

        # 헤드 개수에 따라 데이터를 분리
        querys = tf.stack(tf.split(querys, self.head_num, axis=2))
        keys = tf.stack(tf.split(keys, self.head_num, axis=2))
        values = tf.stack(tf.split(values, self.head_num, axis=2))
        
        # 쿼리와 키를 먼저 곱해줌 -> 위 이미지의 식 (5)와 같음
        inner_product = tf.matmul(querys, keys, transpose_b=True)
        if self.scaling:
            inner_product /= self.att_embedding_size ** 0.5
        self.normalized_att_scores =  tf.nn.softmax(inner_product)
        
        # 쿼리와 키에서 나온 어텐션 값을 값(value)에 곱해줌 -> 식 (6)과 같음
        result = tf.matmul(self.normalized_att_scores, values)
        # 식 (7)과 같이 쪼개어진 멀테 헤드를 모아줌
        result = tf.concat(tf.split(result, self.head_num, ), axis=-1)
        result = tf.squeeze(result, axis=0) 

        if self.use_res:
            result += tf.tensordot(inputs, self.W_Res, axes=(-1, 0))
        result = tf.nn.relu(result)
        
        # 그 결과 값 리턴
        return result

    def compute_output_shape(self, input_shape):

        return (None, input_shape[1], self.att_embedding_size * self.head_num)

    def get_config(self, ):
        config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, 'use_res': self.use_res,'seed': self.seed}
        base_config = super(MultiHeadSelfAttention, self).get_config()
        base_config.update(config)
        return base_config

<br>

## 2. AutoInt+ 모델

In [73]:
class AutoIntMLP(Layer): 
    '''
     AutoInt 본체
     앞서서 정의한 layer를 가져와서 계산을 수행
    '''
    def __init__(self, field_dims, embedding_size, att_layer_num=3, att_head_num=2, att_res=True, dnn_hidden_units=(32, 32), dnn_activation='relu',
                 l2_reg_dnn=0, l2_reg_embedding=1e-5, dnn_use_bn=False, dnn_dropout=0.4, init_std=0.0001):
        super(AutoIntMLP, self).__init__()
        self.embedding = FeaturesEmbedding(field_dims, embedding_size)
        self.num_fields = len(field_dims)
        self.embedding_size = embedding_size

        self.final_layer = Dense(1, use_bias=False, kernel_initializer=tf.random_normal_initializer(stddev=init_std))
        
        self.dnn = tf.keras.Sequential()
        for units in dnn_hidden_units:
            self.dnn.add(Dense(units, activation=dnn_activation,
                               kernel_regularizer=tf.keras.regularizers.l2(l2_reg_dnn),
                               kernel_initializer=tf.random_normal_initializer(stddev=init_std)))
            if dnn_use_bn:
                self.dnn.add(BatchNormalization())
            self.dnn.add(Activation(dnn_activation))
            if dnn_dropout > 0:
                self.dnn.add(Dropout(dnn_dropout))
        self.dnn.add(Dense(1, kernel_initializer=tf.random_normal_initializer(stddev=init_std)))

        self.int_layers = [MultiHeadSelfAttention(att_embedding_size=embedding_size, head_num=att_head_num, use_res=att_res) for _ in range(att_layer_num)]

    def call(self, inputs):
        embed_x = self.embedding(inputs)
        dnn_embed = tf.reshape(embed_x, shape=(-1, self.embedding_size * self.num_fields))

        att_input = embed_x
        for layer in self.int_layers:
            att_input = layer(att_input)

        att_output = Flatten()(att_input)
        att_output = self.final_layer(att_output)
        
        dnn_output = self.dnn(dnn_embed)
        y_pred = tf.keras.activations.sigmoid(att_output + dnn_output)
        
        return y_pred

<br>

## 3. 평가 지표

In [74]:
def get_DCG(ranklist, y_true):
    dcg = 0.0
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item in y_true:
            dcg += 1.0 / math.log(i + 2)
    return  dcg

def get_IDCG(ranklist, y_true):
    idcg = 0.0
    i = 0
    for item in y_true:
        if item in ranklist:
            idcg += 1.0 / math.log(i + 2)
            i += 1
    return idcg

def get_NDCG(ranklist, y_true):
    '''NDCG 평가 지표'''
    ranklist = np.array(ranklist).astype(int)
    y_true = np.array(y_true).astype(int)
    dcg = get_DCG(ranklist, y_true)
    idcg = get_IDCG(y_true, y_true)
    if idcg == 0:
        return 0
    return round( (dcg / idcg), 5)

def get_hit_rate(ranklist, y_true):
    '''hitrate 평가 지표'''
    c = 0
    for y in y_true:
        if y in ranklist:
            c += 1
    return round( c / len(y_true), 5 )

<br>

## 4. 모델 테스트

In [75]:
def test_model(model, test_df):
    '''모델 테스트'''
    user_pred_info = defaultdict(list)
    total_rows = len(test_df)
    for i in range(0, total_rows, batch_size):
        features = test_df.iloc[i:i + batch_size, :-1].values
        y_pred = model.predict(features, verbose=False)
        for feature, p in zip(features, y_pred):
            u_i = feature[:2]
            user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
    return user_pred_info

<br>

## 5. 데이터 불러오기 및 세팅

In [76]:
project_path = os.path.abspath(os.getcwd())
data_dir_nm = 'data'
movielens_dir_nm = 'ml-1m'
model_dir_nm = 'model'
data_path = f"{project_path}/{data_dir_nm}"
movielen_path = f"{project_path}/{movielens_dir_nm}"
model_path = f"{project_path}/{model_dir_nm}"

In [77]:
# 1. 데이터 불러오기
movielens_rcmm = pd.read_csv(f"{data_path}/movielens_rcmm_v2.csv", dtype=str)
print(movielens_rcmm.shape)
movielens_rcmm.head()

(1000209, 15)


Unnamed: 0,user_id,movie_id,movie_decade,movie_year,rating_year,rating_month,rating_decade,genre1,genre2,genre3,gender,age,occupation,zip,label
0,1,1193,1970s,1975,2000,12,2000s,Drama,no,no,F,1,10,48067,1
1,1,661,1990s,1996,2000,12,2000s,Animation,Children's,Musical,F,1,10,48067,0
2,1,914,1960s,1964,2000,12,2000s,Musical,Romance,no,F,1,10,48067,0
3,1,3408,2000s,2000,2000,12,2000s,Drama,no,no,F,1,10,48067,1
4,1,2355,1990s,1998,2001,1,2000s,Animation,Children's,Comedy,F,1,10,48067,1


In [78]:
# 2. 라벨 인코더(label encoder)
label_encoders = {col: LabelEncoder() for col in movielens_rcmm.columns[:-1]} # label은 제외

for col, le in label_encoders.items():
    movielens_rcmm[col] = le.fit_transform(movielens_rcmm[col])

In [79]:
movielens_rcmm.head()

Unnamed: 0,user_id,movie_id,movie_decade,movie_year,rating_year,rating_month,rating_decade,genre1,genre2,genre3,gender,age,occupation,zip,label
0,0,189,6,55,0,3,0,7,17,15,0,0,2,1588,1
1,0,3374,8,76,0,3,0,2,2,8,0,0,2,1588,0
2,0,3615,5,44,0,3,0,11,12,15,0,0,2,1588,0
3,0,2503,9,80,0,3,0,7,17,15,0,0,2,1588,1
4,0,1374,8,78,1,0,0,2,2,2,0,0,2,1588,1


In [80]:
movielens_rcmm['label'] = movielens_rcmm['label'].astype(np.float32)

In [81]:
# 3. 학습 데이터와 테스트데이터로 분리
train_df, test_df = train_test_split(movielens_rcmm, test_size=0.2, random_state=42)

In [82]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 800167 entries, 416292 to 121958
Data columns (total 15 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   user_id        800167 non-null  int64  
 1   movie_id       800167 non-null  int64  
 2   movie_decade   800167 non-null  int64  
 3   movie_year     800167 non-null  int64  
 4   rating_year    800167 non-null  int64  
 5   rating_month   800167 non-null  int64  
 6   rating_decade  800167 non-null  int64  
 7   genre1         800167 non-null  int64  
 8   genre2         800167 non-null  int64  
 9   genre3         800167 non-null  int64  
 10  gender         800167 non-null  int64  
 11  age            800167 non-null  int64  
 12  occupation     800167 non-null  int64  
 13  zip            800167 non-null  int64  
 14  label          800167 non-null  float32
dtypes: float32(1), int64(14)
memory usage: 94.6 MB


In [83]:
# 필요 컬럼들과 레이블 정의
# 필드의 각 고유 개수를 정의하는 field_dims를 정의 -> 임베딩 때 활용
u_i_feature = ['user_id', 'movie_id']
meta_features = ['movie_decade', 'movie_year', 'rating_year', 'rating_month', 'rating_decade', 'genre1','genre2', 'genre3', 'gender', 'age', 'occupation', 'zip']
label = 'label'
field_dims = np.max(movielens_rcmm[u_i_feature + meta_features].astype(np.int64).values, axis=0) + 1
field_dims

array([6040, 3706,   10,   81,    4,   12,    1,   18,   18,   16,    2,
          7,   21, 3439])

<br>

## 6. 훈련 환경 및 모델 세팅

In [84]:
# 모델 훈련 설정 세팅
epochs=5               # 에포크
learning_rate= 0.0001  # 학습률
dropout= 0.4           # 드롭아웃
batch_size = 2048      # 배치사이즈
embed_dim= 16          # 임베딩 크기

고칠 부분
이부분도 AutoIntMLP를 가져와주고 DNN 레이어가 붙은 파라미터를 추가해줍니다

In [85]:
# AutoInt 레이어를 가지고 있는 모델 본체
# 해당 모델을 활용해 훈련 진행
class AutoIntModel(Model):
    def __init__(self, field_dims, embedding_size, att_layer_num=3, att_head_num=2,
                 att_res=True, dnn_hidden_units=(32, 32), dnn_activation='relu',
                 l2_reg_dnn=0, l2_reg_embedding=1e-5, dnn_use_bn=False,
                 dnn_dropout=0.4, init_std=0.0001):
        super(AutoIntModel, self).__init__()
        self.autoInt_layer = AutoIntMLP(
            field_dims=field_dims,
            embedding_size=embedding_size,
            att_layer_num=att_layer_num,
            att_head_num=att_head_num,
            att_res=att_res,
            dnn_hidden_units=dnn_hidden_units,
            dnn_activation=dnn_activation,
            l2_reg_dnn=l2_reg_dnn,
            l2_reg_embedding=l2_reg_embedding,
            dnn_use_bn=dnn_use_bn,
            dnn_dropout=dnn_dropout,
            init_std=init_std
        )

    def call(self, inputs, training=False):
        return self.autoInt_layer(inputs, training=training)

In [86]:
# 모델 정의
autoInt_model = AutoIntModel(
    field_dims=field_dims,
    embedding_size=embed_dim,
    att_layer_num=3,
    att_head_num=2,
    att_res=True,
    dnn_hidden_units=(32, 32),   # DNN 은닉층 구조
    dnn_activation='relu',       # 활성화 함수
    l2_reg_dnn=0,
    l2_reg_embedding=1e-5,
    dnn_use_bn=False,
    dnn_dropout=dropout,
    init_std=0.0001
)

In [87]:
autoInt_model.summary()

In [88]:
# 옵티마이저, 오차함수 정의
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy(from_logits=False)

In [89]:
autoInt_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

<br>

## 7. 훈련 및 평가

In [90]:
history = autoInt_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 39ms/step - binary_crossentropy: 0.6871 - loss: 0.6871 - val_binary_crossentropy: 0.6453 - val_loss: 0.6453
Epoch 2/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 35ms/step - binary_crossentropy: 0.6298 - loss: 0.6298 - val_binary_crossentropy: 0.5892 - val_loss: 0.5892
Epoch 3/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 35ms/step - binary_crossentropy: 0.5743 - loss: 0.5743 - val_binary_crossentropy: 0.5494 - val_loss: 0.5494
Epoch 4/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 36ms/step - binary_crossentropy: 0.5433 - loss: 0.5433 - val_binary_crossentropy: 0.5434 - val_loss: 0.5434
Epoch 5/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 40ms/step - binary_crossentropy: 0.5364 - loss: 0.5364 - val_binary_crossentropy: 0.5416 - val_loss: 0.5416


In [91]:
# 사용자에게 예측된 정보를 저장하는 딕셔너리
user_pred_info = {}
# top10개
top = 10

# 테스트 값 가져오기
mymodel_user_pred_info = test_model(autoInt_model, test_df)

# 사용자마다 돌면서 예측 데이터 중 가장 높은 top 10만 가져오기
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist

# 원본 테스트 데이터에서 label이 1인 사용자별 영화 정보 가져오기
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|██████████| 6038/6038 [00:00<00:00, 91154.59it/s]


In [92]:
mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

# 모델 예측값과 원본 테스트 데이터를 비교해서
# 어느정도 성능이 나왔는지 NDCG와 Hitrate를 비교

# NDCG
for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    # NDCG 값 구하기
    user_ndcg = get_NDCG(mymodel_pred, testset)

    mymodel_ndcg_result[user] = user_ndcg

# Hitrate
for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    # hitrate 값 구하기
    user_hitrate = get_hit_rate(mymodel_pred, testset)

    # 사용자 hitrate 결과 저장
    mymodel_hitrate_result[user] = user_hitrate

100%|██████████| 5994/5994 [00:00<00:00, 18786.50it/s]
100%|██████████| 5994/5994 [00:00<00:00, 110303.78it/s]


In [93]:
print("mymodel ndcg: ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print("mymodel hitrate: ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

mymodel ndcg:  0.6617
mymodel hitrate:  0.63023


<br>

## 8. 모델 저장

학습된 모델과 가중치를 저장한다.

In [44]:
# 저장할 경로 설정
save_path = f"{data_path}/field_dims2.npy"

# 저장
np.save(save_path, field_dims)

In [101]:
# 모델의 가중치(weight) 저장
# 최근 Keras 버전(특히 TensorFlow 2.15+ 또는 Keras 3+)에서는 확장자 .weights.h5 필수
autoInt_model.save_weights(f"{model_path}/autoIntMLP_model_weights2.weights.h5")

In [48]:
import joblib

# 모델 객체를 pickled binary file 형태로 저장
joblib.dump(label_encoders, f"{data_path}/label_encoders2.pkl")

['/Users/jul_e/Documents/DS/recommender_system_autoint_project/data/label_encoders2.pkl']

<br>

## AutoInt+ 모델 성능 향상 실험

In [97]:
import itertools, gc, numpy as np, pandas as pd, tensorflow as tf
from tqdm.auto import tqdm

# 1. 탐색할 파라미터 그리드
param_grid = {
    "embed_dim" : [8, 16],        # 임베딩 차원
    "dropout"   : [0.2, 0.4],     # 드롭아웃
    "lr"        : [1e-4],         # 학습률
    "batch"     : [1024, 2048],   # 배치사이즈
    "epochs"    : [3]             # 에포크
}

search_space = [
    dict(zip(param_grid.keys(), vals))
    for vals in itertools.product(*param_grid.values())
]

In [98]:
# 2. 단일 실험 실행 함수
def run_single_exp(cfg: dict) -> dict:
    """cfg = {'embed_dim':…, 'dropout':…, …}"""
    # 메모리 초기화
    tf.keras.backend.clear_session()
    gc.collect()
    
    # (1) 모델 생성
    model = AutoIntModel(
        field_dims     = field_dims,
        embedding_size = cfg["embed_dim"],
        dnn_dropout    = cfg["dropout"]
    )
    
    # (2) compile
    model.compile(
        optimizer = tf.keras.optimizers.Adam(learning_rate = cfg["lr"]),
        loss      = tf.keras.losses.BinaryCrossentropy(from_logits = False)
    )
    
    # (3) 학습
    model.fit(
        train_df[u_i_feature + meta_features],
        train_df[label],
        epochs           = cfg["epochs"],
        batch_size       = cfg["batch"],
        validation_split = 0.1,
        verbose          = 0
    )
    
    # (4) 평가
    user_pred_info = {}
    top = 10
    
    pred_results = test_model(model, test_df)
    for user, data_info in pred_results.items():
        ranklist = sorted(data_info, key=lambda s: s[1], reverse=True)[:top]
        user_pred_info[str(user)] = list(dict.fromkeys(r[0] for r in ranklist))
    
    test_data = (test_df[test_df['label'] == 1]
                 .groupby('user_id')['movie_id']
                 .apply(list))
    
    ndcg_list, hr_list = [], []
    for user, truths in test_data.items():
        preds  = user_pred_info.get(str(user), [])[:top]
        truths = list(set(map(int, truths)))
        ndcg_list.append(get_NDCG(preds, truths))
        hr_list.append(get_hit_rate(preds, truths))
    
    return {
        "embed_dim" : cfg["embed_dim"],
        "dropout"   : cfg["dropout"],
        "lr"        : cfg["lr"],
        "batch"     : cfg["batch"],
        "epochs"    : cfg["epochs"],
        "NDCG@10"   : round(np.mean(ndcg_list), 4),
        "HitRate@10": round(np.mean(hr_list), 4)
    }

In [99]:
# 3. 그리드 탐색 실행
results = []
for cfg in tqdm(search_space, desc="AutoInt+ experiments"):
    results.append(run_single_exp(cfg))

AutoInt+ experiments:   0%|          | 0/8 [00:00<?, ?it/s]

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))


In [100]:
# 4. 결과 요약 표 출력
results_df = (pd.DataFrame(results)
              .sort_values(by="NDCG@10", ascending=False)
              .reset_index(drop=True))

print("\nAutoInt+ 실험 결과")
display(results_df)


AutoInt+ 실험 결과


Unnamed: 0,embed_dim,dropout,lr,batch,epochs,NDCG@10,HitRate@10
0,16,0.2,0.0001,1024,3,0.6619,0.6301
1,16,0.4,0.0001,1024,3,0.6617,0.6301
2,16,0.2,0.0001,2048,3,0.6615,0.6298
3,16,0.4,0.0001,2048,3,0.6609,0.6294
4,8,0.2,0.0001,1024,3,0.6602,0.6279
5,8,0.4,0.0001,1024,3,0.6598,0.6277
6,8,0.2,0.0001,2048,3,0.6592,0.6276
7,8,0.4,0.0001,2048,3,0.6589,0.6282
