In [2]:
"""
*** About this recommendation model ***

このレコメンドではパラメータ５つに関して、類似した商品を抽出するモデル
パラメータ＝＞カテゴリ、モデル、価格帯、性別、高評価

まず「協調フィルタリング」で4つのパラメータ（カテゴリ、モデル、価格帯、性別）で類似した商品を抽出

そのあと「RankNet」で高評価パラメータを使い、高評価順に並べ替える
"""

import numpy as np
import pandas as pd
import collections 
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from collections import OrderedDict
from gensim import models
from gensim.models.doc2vec import LabeledSentence
import tensorflow as tf
from keras.utils import print_summary
from keras import layers
from keras.models import Model, save_model, Sequential
from keras.layers import Activation, Dense, Input, Lambda, Dropout
from keras import backend as K
%matplotlib inline

"""
# 対象ユーザーのメタデータ予測
"""
#　対象ユーザーのデータ読み込み
df=pd.read_csv('dataset.csv')
dele_col = ['model_id', 'brand_id', 'brand_name','kaiin_id', 'tori_id', 'cate_name',
           'syo_id', 'man', 'women', 'age80th', 'age90th', 'age_20th', 'suryo', 'tanka', 'bad',
       'safety_tesuryo', 'popularity_boost', 'thm_id', 'minyuka_tag', 'model',
       'line', 'series', 'kigata']
df=df.drop(dele_col, axis=1).fillna(0)
user_data=df[:1]
print(user_data.shape)
user_data.head()

(1, 6)


Unnamed: 0,cate_1_id,cate_3_id,dispflg,cate_id,category_level,label
0,1001,3112.0,1,3112,3,2114


In [3]:
# 協調フィルタリング

def c_filtering(INPUT_DIM, weight_path):
    classes=91
    model = Sequential()
    model.add(Dense(2048, input_dim=INPUT_DIM, activation='relu'))
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(classes, activation='softmax'))
    model.load_weights(weight_path)
    model.compile(optimizer='Adam',
                    loss='categorical_crossentropy', 
                    metrics=['accuracy'])
    return model

# 標準化
u_columns=list(user_data.columns)

sc = StandardScaler()
user_data = sc.fit_transform(user_data)
user_data=pd.DataFrame(user_data)
user_data.columns=u_columns

In [4]:
# ターゲットユーザーのメタデータ上位５　(labels)の予測
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
K.set_session(sess)

cfil_weight='/home/ubuntu/c_point/cfil_model.h5'
model=c_filtering(6, cfil_weight)
pred = model.predict(user_data, verbose=1)

top_k=sess.run(tf.nn.top_k(pred,k=5,sorted=True))
labels=list(top_k[1][0])
print(labels)

[60, 59, 70, 58, 69]


In [12]:
def kbn_type(target_df, dataset):
    kbn_colmuns=['model', 'line','series','kigata']
    kbn_type=[]
    for kbn in kbn_colmuns:
        for value in target_df[kbn]:
            if value==1:
                kbn_type.append(kbn)
    kbn_type=','.join(kbn_type)
    return dataset[dataset[kbn_type]==1]


def tanka(target_df, dataset):
    price=int(target_df['tanka'])
    yen=2000
    tankset=dataset[(dataset['tanka']>=(price-yen)) & (dataset['tanka']<=(price+yen))]
    return pd.DataFrame(tankset)


def sex(target_df, dataset):
    sex_colmuns=['man', 'women']
    sex_type=[]
    for sex in sex_colmuns:
        for value in target_df[sex]:
            if value==1:
                sex_type.append(sex)
    sex_type=','.join(sex_type)
    return dataset[dataset[sex_type]==1]

In [5]:
"""
用意してた商品のデータセット(ddf)のメタデータ予測
"""

# データセット (ddf) の読み込み
ddf=pd.read_csv('Rankdf.csv')
ddf=ddf.drop('Unnamed: 0', axis=1)
ddf=ddf.fillna(0)
print(ddf.shape)
ddf.head()

(884350, 30)


Unnamed: 0,kaiin_id,tori_id,syo_id,cate_id1,cate_id2,cate_id3,cate_level,dispflg,model_id,tanka,...,teikei_cd,model,line,series,kigata,minyuka_tag,ps,view_detail,add_fav,rank
0,1582246,12855050,34375600,1002,2201,3263.0,3,1,256,34990.0,...,0,1,0,0,0,0,0,0,1,0.87875
1,1582246,12855050,34375600,1002,2201,3263.0,3,1,256,34990.0,...,0,1,0,0,0,0,0,1,0,0.75375
2,1582246,12855050,34375600,1002,2201,3263.0,3,1,256,34990.0,...,0,1,0,0,0,0,0,0,0,0.37875
3,1582246,12855050,34375600,1002,2201,3263.0,3,1,256,34990.0,...,0,1,0,0,0,0,0,1,0,0.75375
4,1582246,12855050,34375600,1002,2201,3263.0,3,1,256,34990.0,...,0,1,0,0,0,0,0,1,0,0.755


In [6]:
dcol = ['kaiin_id', 'tori_id', 'syo_id','model_id', 'man', 'women','tanka', 'brand_id',
       'rating', 'brand_name', 'cate_name', 'good', 'safety_tesuryo',
       'popularity_boost', 'teikei_cd', 'model', 'line', 'series', 'kigata',
       'minyuka_tag', 'ps', 'view_detail', 'add_fav', 'rank']
dee=ddf.drop(dcol, axis=1).fillna(0)

# 標準化
f1_columns=list(dee.columns)

sc = StandardScaler()
dee = sc.fit_transform(dee)
dee=pd.DataFrame(dee)
dee.columns=f1_columns
dee.head()

Unnamed: 0,cate_id1,cate_id2,cate_id3,cate_level,dispflg,cate_id
0,0.95417,0.308145,0.204376,0.12981,0.0,0.167486
1,0.95417,0.308145,0.204376,0.12981,0.0,0.167486
2,0.95417,0.308145,0.204376,0.12981,0.0,0.167486
3,0.95417,0.308145,0.204376,0.12981,0.0,0.167486
4,0.95417,0.308145,0.204376,0.12981,0.0,0.167486


In [7]:
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
K.set_session(sess)

#  データセットのメタデータ予測 & データセット加工

cfil_weight='/home/ubuntu/c_point/cfil_model.h5'
model=c_filtering(6, cfil_weight)
metas = model.predict(dee, verbose=1)

metass=sess.run(tf.nn.top_k(metas,k=1,sorted=True))
meta_label=np.array(metass[1])
print(meta_label.shape)


f=pd.DataFrame(meta_label, columns=['meta_label'])
ddf=pd.concat([ddf, f], axis=1)
print(ddf.shape)

(884350, 1)
(884350, 31)


In [8]:
"""
用意してた商品のデータセット(ddf)から、類似商品抽出
"""

# 対象ユーザーデータの読み込み

df=pd.read_csv('dataset.csv')
target_user=df[:1]
target_user

Unnamed: 0,kaiin_id,syo_id,tori_id,model_id,cate_1_id,cate_3_id,dispflg,man,women,age80th,...,model,line,series,kigata,cate_id,brand_id,brand_name,cate_name,category_level,label
0,4945115,28199387,10835063,98,1001,3112.0,1,1,0,0,...,0,1,0,0,3112,158,CHANEL(シャネル),コインケース・小銭入れ,3,2114


In [14]:
# 同じメタデータ同士を抽出 ＆ 結合

la1=ddf[ddf['meta_label']==labels[0]]
la2=ddf[ddf['meta_label']==labels[1]]
la3=ddf[ddf['meta_label']==labels[2]]
la4=ddf[ddf['meta_label']==labels[3]]
la5=ddf[ddf['meta_label']==labels[4]]

print(la1.shape, la2.shape, la3.shape, la4.shape, la5.shape)
labelset=pd.concat([la1, la2, la3, la4, la5])
print(labelset.shape)

# メタデータ内で同じcate_idを削除

labelset=ddf[ddf['cate_id']!=int(target_user['cate_id'])]
print(labelset.shape)

(671514, 31) (0, 31) (29525, 31) (0, 31) (0, 31)
(701039, 31)
(879348, 31)


In [15]:
# 類似モデル抽出

kbnset=kbn_type(target_user, labelset)
print(kbnset.shape)

(102013, 31)


In [16]:
# 類似価格帯の抽出

tankaset=tanka(target_user, kbnset)
print(tankaset.shape)


# 同性の抽出
c_filset=sex(target_user, tankaset)
c_filsets=c_filset.reset_index()
print(c_filsets.shape)
c_filsets.head()


# 取引IDの重複削除
#c_filset=sexset[~sexset['tori_id'].duplicated()]

(1597, 31)
(401, 32)


Unnamed: 0,index,kaiin_id,tori_id,syo_id,cate_id1,cate_id2,cate_id3,cate_level,dispflg,model_id,...,model,line,series,kigata,minyuka_tag,ps,view_detail,add_fav,rank,meta_label
0,45716,5943998,13257294,30492756,1002,2208,3408.0,3,1,89,...,0,1,0,0,0,0,1,0,1.13125,48
1,45717,5943998,13257294,30492756,1002,2208,3408.0,3,1,89,...,0,1,0,0,0,0,0,1,1.25625,48
2,45718,5943998,13257294,30492756,1002,2208,3408.0,3,1,89,...,0,1,0,0,0,0,0,0,0.75625,48
3,45719,5943998,13257294,30492756,1002,2208,3408.0,3,1,89,...,0,1,0,0,0,0,1,0,1.13125,48
4,45720,5943998,13257294,30492756,1002,2208,3408.0,3,1,89,...,0,1,0,0,0,0,1,0,1.13125,48


In [19]:
"""
「RankNet」で高評価パラメータを使い、高評価順に並べ替える
"""

# 正規化し、Ranknet用にデータ加工

rank_drop_col = ['index', 'kaiin_id', 'tori_id', 'syo_id', 'model_id', 'tanka', 'cate_id',
       'brand_id', 'cate_name', 'brand_name', 'man', 'women', 'model',
       'line', 'series', 'kigata', 'safety_tesuryo','cate_id1', 'cate_id2', 
        'cate_id3', 'cate_level', 'dispflg','view_detail', 'rank']
rankset=c_filsets.drop(rank_drop_col, axis=1)



rankset_columns=list(rankset.columns)
sc = StandardScaler()
dff = sc.fit_transform(rankset)
rankset=pd.DataFrame(dff)
rankset.columns=rankset_columns

print('data_size:{}'.format(rankset.shape))
rankset.head()

data_size:(401, 8)


Unnamed: 0,rating,good,popularity_boost,teikei_cd,minyuka_tag,ps,add_fav,meta_label
0,0.396169,1.307463,0.0,1.577009,-0.19713,-0.25231,-0.447883,0.377694
1,0.396169,1.307463,0.0,1.577009,-0.19713,-0.25231,2.232728,0.377694
2,0.396169,1.307463,0.0,1.577009,-0.19713,-0.25231,-0.447883,0.377694
3,0.396169,1.307463,0.0,1.577009,-0.19713,-0.25231,-0.447883,0.377694
4,0.396169,1.307463,0.0,1.577009,-0.19713,-0.25231,-0.447883,0.377694


In [20]:
# 評価値(rank)の予測

def Ranknet(INPUT_DIM, weight_path):
    model = Sequential()
    model.add(Dense(INPUT_DIM, input_dim=INPUT_DIM, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))
    model.load_weights(weight_path)
    model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

rank_weight_path='/home/ubuntu/c_point/rank_model.h5'
model=Ranknet(8,rank_weight_path)
rank = model.predict(rankset, verbose=1, steps=1)
print(rank.shape)
print(rank.min())
print(rank.max())

(401, 1)
-1.954536
2.8852162


In [21]:
# レコメンド商品を高評価順に上位12こ抽出

def recommend(rank, rankset, c_filsets):
    topN_items=12
    rank=[i for i in rank]
    f=pd.DataFrame(rank, columns=['rank'])
    dataset=pd.concat([rankset, f], axis=1)
    dataset=dataset.sort_values(by='rank', ascending=False)

    idxs=dataset.index[:topN_items]
    recommend=[v for i, v in c_filsets.iterrows() if i in idxs]
    return pd.DataFrame(recommend)


recommendation=recommend(rank, rankset, c_filsets)
d_col = ['index', 'popularity_boost', 'teikei_cd', 'good', 'safety_tesuryo',
         'model', 'line', 'series', 'kigata', 'rank']
recommendations=recommendation.drop(d_col, axis=1)
recommendations

Unnamed: 0,kaiin_id,tori_id,syo_id,cate_id1,cate_id2,cate_id3,cate_level,dispflg,model_id,tanka,...,rating,brand_name,cate_name,man,women,minyuka_tag,ps,view_detail,add_fav,meta_label
103,5416825,11940563,28748742,1001,2114,3111.0,3,1,1566,70200.0,...,5,GUCCI(グッチ),折りたたみ財布,1,0,1,0,0,1,60
179,5416825,11936206,28748742,1001,2114,3111.0,3,1,1566,70200.0,...,5,GUCCI(グッチ),折りたたみ財布,1,0,1,0,0,0,60
188,5416825,11936206,28748742,1001,2114,3111.0,3,1,1566,70200.0,...,5,GUCCI(グッチ),折りたたみ財布,1,0,1,0,1,0,60
197,5416825,11936206,28748742,1001,2114,3111.0,3,1,1566,70200.0,...,5,GUCCI(グッチ),折りたたみ財布,1,0,1,0,0,1,60
214,5416825,11936206,28748742,1001,2114,3111.0,3,1,1566,70200.0,...,5,GUCCI(グッチ),折りたたみ財布,1,0,1,0,0,0,60
255,3778391,11137624,24808411,1001,2114,3169.0,3,1,89,69800.0,...,5,Louis Vuitton(ルイヴィトン),長財布,1,0,0,1,0,1,60
263,3778391,11137624,24808411,1001,2114,3169.0,3,1,89,69800.0,...,5,Louis Vuitton(ルイヴィトン),長財布,1,0,0,1,0,1,60
379,4478163,11806994,28887645,1001,2114,4114.0,3,1,98,69500.0,...,5,CHANEL(シャネル),キーケース,1,0,0,1,1,0,70
384,4478163,11806994,28887645,1001,2114,4114.0,3,1,98,69500.0,...,5,CHANEL(シャネル),キーケース,1,0,0,1,1,0,70
390,4478163,11806994,28887645,1001,2114,4114.0,3,1,98,69500.0,...,5,CHANEL(シャネル),キーケース,1,0,0,1,1,0,70


In [22]:
# 対象ユーザーのデータとレコメンドデータの内容比較
target_user

Unnamed: 0,kaiin_id,syo_id,tori_id,model_id,cate_1_id,cate_3_id,dispflg,man,women,age80th,...,model,line,series,kigata,cate_id,brand_id,brand_name,cate_name,category_level,label
0,4945115,28199387,10835063,98,1001,3112.0,1,1,0,0,...,0,1,0,0,3112,158,CHANEL(シャネル),コインケース・小銭入れ,3,2114
