# Related Products Recommendation

확장 기능 로드

In [1]:
%reload_ext sql
%run libdb.py

데이터베이스 접속

In [2]:
# postgresql+psycopg2://사용자ID:password@127.0.0.1:5432/데이터베이스
%sql postgresql+psycopg2://dj.lee@127.0.0.1:5432/postgres

'Connected: dj.lee@postgres'

## Related Products based on TF-IDF Cosine Similarity
Create item view count table

In [3]:
%%sql
drop table if exists item_view_cnt;

create table item_view_cnt as
select item_id, uid, count(*) cnt
from view_log
group by item_id, uid;

create index idx_item_view_cnt_1 on item_view_cnt (item_id, uid);

create index idx_item_view_cnt_2 on item_view_cnt (uid, item_id);

Done.
523626 rows affected.
Done.
Done.


[]

Count items and set it to `total_item`

In [4]:
rows = %sql select count(distinct item_id) from item_view_cnt;
total_item = rows[0][0]
total_item

1 rows affected.


51481

Compute idf on user dimensions

In [5]:
%%sql 
drop table if exists user_idf;

create table user_idf as
select uid, log(:total_item/count(distinct item_id)) idf
from item_view_cnt
group by uid;

create index idx_user_idf_1 on user_idf (uid, idf);

Done.
187437 rows affected.
Done.


[]

Create item weight table using tf-idf weighting scheme

In [6]:
%%sql
drop table if exists item_weight;

create table item_weight as
select t1.item_id, t1.uid, t1.cnt * t2.idf weight
from item_view_cnt t1
    join user_idf t2 on t1.uid = t2.uid;
    
create index idx_item_weight_1 on item_weight (item_id, uid, weight);

Done.
523626 rows affected.
Done.


[]

Check item weights

In [7]:
%%sql
select *
from item_weight
order by item_id
limit 10;

10 rows affected.


item_id,uid,weight
0001223,ffa2096,3.56549362986886
00018bb,42a839c,4.71164697432916
0001ec5,358d1e5,3.53554727917667
0001ec5,4105ae6,7.4232770764647
0001ec5,be0cba6,7.86697457569741
0002841,ef68161,4.01266853389633
00028a5,39f3827,3.34986008219233
00028a5,9731902,3.36921585741014
00028a5,e838454,3.45636603312904
00052d7,cdd2b20,9.42329394865832


Compute item l2 norm 

In [8]:
%%sql
drop table if exists item_l2norm;

create table item_l2norm as
select item_id, pow(w2sum, 1.0/2.0) l2norm
from (
    select item_id, sum(weight*weight) w2sum
    from item_weight
    group by item_id) t;

create index idx_item_l2norm_1 on item_l2norm (item_id, l2norm);

Done.
51481 rows affected.
Done.


[]

Normalize weights

In [9]:
%%sql
drop table if exists item_nw;

create table item_nw as
select t1.item_id, t1.uid, t1.weight/t2.l2norm nw
from item_weight t1
    join item_l2norm t2 on t2.item_id = t1.item_id;

create index idx_item_nw_1 on item_nw (uid, item_id, nw);

Done.
523626 rows affected.
Done.


[]

Compute consine similarity using inner product

In [10]:
%%sql
drop table if exists item_item_sim_view_cos;

create table item_item_sim_view_cos as
select t1.item_id item_id, t2.item_id sim_item_id, sum(t1.nw * t2.nw) sim
from item_nw t1
    join item_nw t2 on t2.uid = t1.uid
where t1.item_id <> t2.item_id
group by t1.item_id, t2.item_id;

create index idx_item_item_sim_view_cos_1 on item_item_sim_view_cos (item_id);

Done.
2578000 rows affected.
Done.


[]

In [11]:
item_id = 'ac616a3'
rows1 = %sql select * from product where item_id = :item_id;
displayImageUrls(getImageUrls(rows1, 'item_image'))
rows2 = %sql select * from item_item_sim_view_cos a inner join product b on a.sim_item_id = b.item_id where a.item_id = :item_id order by a.sim desc limit 20;
displayImageUrls(getImageUrls(rows2, 'item_image'))

1 rows affected.


20 rows affected.


## Personalized Recommendation using User Log and Related Products

In [12]:
%%sql
select a.item_id, score, b.item_image
from (
    select t2.sim_item_id item_id, sum(t1.recency_weight * t2.sim) score
    from (
        select item_id, (1.0/row_number() over (order by server_time desc)) recency_weight
        from view_log t1
        where t1.uid = '4d78ccd'
    ) t1
    join item_item_sim_view_cos t2 on t2.item_id = t1.item_id
    group by t2.sim_item_id) a
    inner join product b on b.item_id = a.item_id
-- where a.item_id not in (select item_id from view_log where uid = '4d78ccd') -- filter user viewed item
order by score desc
limit 20;

20 rows affected.


item_id,score,item_image
104ac3e,0.948552968566644,http://fc-img.recobell.io/images/goods/400/12261400_h.jpg
a66685c,0.942694343902795,http://fc-img.recobell.io/images/goods/215/12235215_h.jpg
25fbf65,0.942518905306304,http://fc-img.recobell.io/images/goods/597/12233597_h.jpg
ccac050,0.912373977770072,http://fc-img.recobell.io/images/goods/863/12285863_h.jpg
84d8fed,0.776566029866605,http://fc-img.recobell.io/images/goods/803/12292803_h.jpg
f1f5cc7,0.605570153311117,http://fc-img.recobell.io/images/goods/615/12287615_h.jpg
f47ca6e,0.570024162804115,http://fc-img.recobell.io/images/goods/523/12134523_h.jpg
bf620ae,0.532248958714295,http://fc-img.recobell.io/images/goods/512/12185512_h.jpg
36a3186,0.503325727143331,http://fc-img.recobell.io/images/goods/564/12274564_h.jpg
999ebd8,0.502757736370948,http://fc-img.recobell.io/images/goods/822/12240822_h.jpg


In [13]:
rows3 = _
displayImageUrls(getImageUrls(rows3, 'item_image'))