 # <font color='red'>GPU Seeting</font>

In [1]:
# gpu number setting
import os
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '1'

# Gpu growth setting
import tensorflow as tf
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)


# tensorflow & keras version check
print('tensorflow version : ' , tf.__version__)
print('keras version : ' , tf.keras.__version__)

# tensorflow gpu available check 
print('GPU available ? : ', tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None))

tensorflow version :  2.3.1
keras version :  2.4.0
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU available ? :  True


 # <font color='red'>Import</font>

In [2]:
import pickle
import tqdm
import math
import itertools
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
pd.options.display.max_columns = 50

from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, MultiLabelBinarizer

from deepctr.models import DeepFM
from deepctr.layers import custom_objects
from deepctr.feature_column import SparseFeat,DenseFeat, get_feature_names, VarLenSparseFeat

# 경로설정

In [3]:
root_path = './'

# model_weight_dir = os.path.join(root_path, 'model_weight')
rec_list_dir = os.path.join(root_path, 'rec_list')

# model weight save path
# if not os.path.exists(model_weight_dir) :
#     os.makedirs(model_weight_dir)

# rec_list_dir save path
if not os.path.exists(rec_list_dir) :
    os.makedirs(rec_list_dir)   

 # <font color='red'>데이터</font>

# history

In [10]:
data = pd.read_csv('../../../../../../../ipr/data/tb_ipr_m_seamless_2nd_iptv.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [11]:
data = data[data['meta'] == 'movie']

* training 과 동일하게 조작

In [12]:
drops = ['sum_watch_duration', 's_time', 'e_time', 'album_name', 'gubun', 
         'vod_s_point', 'vod_e_point', 'agree_yn', 'min_s_time', 
         'view_no', 'fod', 'buy_1_2', 'buy_3', 'buy_seg', 'amt_r_gubun',
         'weekdays', 'weekends', 'dawn', 'morning', 'afternoon', 'evening',
         'watch_ratio', 'current_rate', 're_watch', 'continue_watch',
#          'buy_history_price',
        'meta_genre', 'genre_large', 'genre_mid', 'genre_small',
         'pr_info_desc', 'meta'
        ]

data.drop(columns=drops, inplace=True)

# data['sum_watch_duration'] = data['sum_watch_duration'].fillna(0)

data['category_id'] = data['category_id'].fillna('n')

price_cols = ['movie_meta_price', 'buy_history_price', 'i30_meta_price']
data[price_cols] = data[price_cols].fillna(0)

data = data[data['amt_1_4'].isnull() == False]

data['sa_id'] = data['sa_id'].apply(lambda x: str(x))
data['album_id'] = data['album_id'].apply(lambda x: str(x))

data['watcha_avg_rating'] = data['watcha_avg_rating'].fillna(data['watcha_avg_rating'].mean())
data['ncf_rating'] = data['ncf_rating'].fillna(data['ncf_rating'].mean())

In [13]:
sparse_features = ['sa_id', 'album_id', 'category_id', 'pr_info'
#                    'buy_seg'
                  ]
dense_features = ['release_date', 'run_time', 
                  'movie_meta_price', 'i30_meta_price', 'buy_history_price', 'buy_tot',
                  'amt_1_4', 'amt_2_4', 'amt_3_4', 'amt_4_4', 'watcha_avg_rating']
target = ['ncf_rating']

# <font color='red'>label encoding </font>

In [14]:
with open('./lbe_dict.pickle', 'rb') as f:
    lbe_dict = pickle.load(f)

In [15]:
for feat in sparse_features:
    data[feat] = lbe_dict[feat].transform(data[feat])

mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])

In [16]:
model_pred_dir = './rec_list/'

with open(os.path.join(model_pred_dir,'unique_uid.csv'),'a') as out_file:
    np.savetxt(out_file, np.vstack((lbe_dict['sa_id'].classes_,list(range(len(lbe_dict['sa_id'].classes_))))).T, delimiter=',', fmt=['%s','%i'])        

with open(os.path.join(model_pred_dir,'unique_sid.csv'),'a') as out_file:
    np.savetxt(out_file, np.vstack((lbe_dict['album_id'].classes_,list(range(len(lbe_dict['album_id'].classes_))))).T, delimiter=',', fmt=['%s','%i'])    

# sa_id & album_id meta

In [17]:
sa_feats = ['buy_tot', 'amt_1_4', 'amt_2_4', 'amt_3_4', 'amt_4_4']
album_feats = ['category_id', 'release_date', 'run_time',
              'movie_meta_price', 'i30_meta_price', 'buy_history_price',
              'watcha_avg_rating', 'pr_info'
              ]

In [18]:
sa_id_meta = data.groupby('sa_id')[sa_feats].max()
album_id_meta = data.groupby('album_id')[album_feats].max()

# model load

In [19]:
# 4.Define Model,train,predict and evaluate
model = tf.keras.models.load_model('./model/', custom_objects)

# Prediction

In [20]:
nsa_id = data['sa_id'].nunique()
nalbum_id = data['album_id'].nunique()
album_array = np.sort(data['album_id'].unique())
topk = 300

In [21]:
def batch_user(num_users, batch_size=1):
    user_list = np.sort(data['sa_id'].unique())
    for idx in np.arange(0, num_users, batch_size):
        yield user_list[idx : min(idx + batch_size, num_users)]        

In [22]:
temp_batch_album = np.tile(album_array, 300)

album_id_meta_dict = dict()
for temp_col in album_id_meta.columns :
    album_id_meta_dict[temp_col] = album_id_meta.loc[temp_batch_album, temp_col].values

with open('./rec_list/rec.csv','a') as out_file:
    with tqdm.tqdm(total=math.ceil(nsa_id/300)) as pbar:
        for batch_u in batch_user(nsa_id, 300):
            temp_batch_sa = np.repeat(batch_u, nalbum_id)
            
            # batch 마지막 처리
            if len(batch_u) != 300 :
                temp_batch_album = np.tile(album_array, len(batch_u))
                album_id_meta_dict = dict()
                for temp_col in album_id_meta.columns :
                    album_id_meta_dict[temp_col] = album_id_meta.loc[temp_batch_album, temp_col].values
            
            temp_batch_input = {'sa_id' : temp_batch_sa, 'album_id' : temp_batch_album}
            
            # album_meta 추가
            temp_batch_input.update(album_id_meta_dict)
            
            # sa_id_meta 추가
            for temp_col in sa_id_meta.columns :
                temp_batch_input[temp_col] = sa_id_meta.loc[temp_batch_sa, temp_col].values
           
            #pred = model.predict_on_batch(temp_batch_input)
            pred = model(temp_batch_input).numpy()
            pred_mat = pred.reshape(-1,nalbum_id)
            
            user_idx = 0
            
            for temp_pred in pred_mat:
                temp_topk = np.argsort(temp_pred)[::-1][:topk]
                temp_score = temp_pred[temp_topk]   
                temp_df = np.column_stack((np.full(topk, batch_u[user_idx]),temp_topk, temp_score))
                
                
                fmt = ','.join(['%i','%i','%1.5f'])
                fmt = '\n'.join([fmt]*temp_df.shape[0])
                data = fmt % tuple(temp_df.ravel())      
                out_file.write(data)
                user_idx += 1
            
            pbar.update(1)    

100%|██████████| 3830/3830 [52:52<00:00,  1.21it/s]
