# Graph-based Recommendation

확장 기능 로드

In [1]:
%reload_ext sql
%run libdb.py

데이터베이스 접속

In [2]:
# postgresql+psycopg2://사용자ID:password@127.0.0.1:5432/데이터베이스
%sql postgresql+psycopg2://postgres:@127.0.0.1:5432/postgres

'Connected: postgres@postgres'

## Build Transition Matrix
* User 인덱스 생성
* Item 인덱스 생성
* Item 에 기반하여 각종 스코어 생성 및 Graph 생성

In [3]:
%%sql
-- User 인덱스 생성
drop table if exists tmp_user_index;

create table tmp_user_index as
select uid, row_number() over () as user_index
from (
  select distinct(uid) as uid from view_log
) a;

drop index if exists tmp_user_index_idx1;
create index tmp_user_index_idx1 on tmp_user_index(uid);

Done.
187437 rows affected.
Done.
Done.


[]

In [4]:
%%sql
-- Item 인덱스 생성
drop table if exists tmp_item_index;

create table tmp_item_index as
select item_id, row_number() over () as item_index
from (
  select distinct(item_id) as item_id from view_log
) a;

drop index if exists tmp_item_index_idx1;
create index tmp_item_index_idx1 on tmp_item_index(item_id)

Done.
51481 rows affected.
Done.
Done.


[]

In [5]:
%%sql
-- 사용자-아이템 스코어 생성. 최근 많이 본 상품에 가중치를 주기 위해 decay function 적용
drop table if exists tmp_item_recency;
create table tmp_item_recency as
select
    user_index, item_index,
    pow(0.95, date_part('day', (select max(server_time) max_end_date from view_log)::timestamp - server_time::timestamp))*view_count as score
from (
    select uid, item_id, server_time::date, count(*) as view_count
    from view_log
    group by 1, 2, 3
) a
join tmp_user_index b
    on a.uid = b.uid
join tmp_item_index c
    on a.item_id = c.item_id;

Done.
523626 rows affected.


[]

In [6]:
%%sql
-- 최신성을 고려한 스코어를 이용해 0 <= prob <=1 값을 생성
drop table if exists tmp_item_score;
create table tmp_item_score as
select
    a.*,
    sqrt(sum(pow(a.tf_idf, 2)) over (partition by a.item_index)) as tf_idf_norm,
    sqrt(sum(pow(a.tf, 2)) over (partition by a.item_index)) as tf_norm
from (
    select
        a.user_index,
        a.item_index,
        score recency_tf,
        score/sum(score) over (partition by a.item_index) prob,
        score/sum(score) over (partition by a.user_index) user_prob,
        tf,
        idf,
        tf*idf as tf_idf
    from (
        select
            user_index, item_index,
            sum(score) as score,
            count(*) as tf
        from
            tmp_item_recency
        group by user_index, item_index
    ) a
    left outer join (
        select
            item_index,
            log( (select count(distinct user_index) from tmp_user_index)::float / count(distinct user_index)::float +1 ) as idf
        from
            tmp_item_recency
        group by item_index
    ) b on a.item_index = b.item_index
) a;


Done.
523626 rows affected.


[]

## Cosine Similarity based Recommendation

In [7]:
%%sql
-- cosine similarity 기반 추천 결과 생성

drop table if exists tmp_item_similarity;

create table tmp_item_similarity as
select
    a.target_item_index,
    a.cross_item_index,
    sum(prod)/(max(a.target_norm)*max(a.cross_norm)) as score
from (
    select
        a.item_index as target_item_index,
        b.item_index as cross_item_index,
        a.tf_idf*b.tf_idf as prod,
        a.tf_idf_norm as target_norm,
        b.tf_idf_norm as cross_norm
    from tmp_item_score a
    join tmp_item_score b
        on a.item_index != b.item_index and a.user_index = b.user_index
) a
group by a.target_item_index, a.cross_item_index;

Done.
2578000 rows affected.


[]

In [8]:
item_id = '146cde7'
result = %sql select * from product where item_id = :item_id;
displayImageUrls(getImageUrls(result, 'item_image'))
result2 = %sql select * from tmp_item_similarity a join tmp_item_index b on a.target_item_index = b.item_index join tmp_item_index c on a.cross_item_index = c.item_index join product d on c.item_id = d.item_id where b.item_id = :item_id order by score desc limit 20;
displayImageUrls(getImageUrls(result2, 'item_image'))

1 rows affected.


20 rows affected.


## P3 based Recommendation

In [9]:
%%sql
-- P3 그래프 생성
drop table if exists tmp_p3_graph;
create table tmp_p3_graph as
select *
from (
    select a.*, row_number() over (partition by item_index order by prob desc) as rank from tmp_item_score a
) a
where rank <= 100;

drop index if exists tmp_p3_graph_idx1;
create index tmp_p3_graph_idx1 on tmp_p3_graph(user_index);

drop index if exists tmp_p3_graph_idx2;
create index tmp_p3_graph_idx2 on tmp_p3_graph(item_index);


Done.
273819 rows affected.
Done.
Done.
Done.
Done.


[]

In [10]:
%%sql
-- Item:User x User:Item 그래프로 Item:Item 그래프 생성
drop table if exists tmp_p3_iter1;

create table tmp_p3_iter1 as
select item_index1, item_index2, prob
from (
   select
      item_index1, item_index2, prob, row_number() over (partition by item_index1 order by prob desc) as rank
   from
      (
        select
            a.item_index as item_index1,
            b.item_index as item_index2,
            sum(a.prob * b.prob) as prob
        from tmp_p3_graph a
        inner join tmp_p3_graph b
        on a.user_index = b.user_index and a.item_index != b.item_index
        group by a.item_index, b.item_index
   ) a
) a
where rank <= 100;

drop index if exists tmp_p3_iter1_idx1;
create index tmp_p3_iter1_idx1 on tmp_p3_iter1(item_index2);

drop index if exists tmp_p3_iter1_idx2;
create index tmp_p3_iter1_idx2 on tmp_p3_iter1(item_index1, item_index2);


-- Item:Item x Item:User 그래프로 Item:User 그래프 생성
drop table if exists tmp_p3_iter2;

create table tmp_p3_iter2 as
select item_index, user_index, prob
from (
    select
        item_index, user_index, prob, row_number() over (partition by item_index order by prob desc) as rank
    from (
        select
            a.item_index1 as item_index,
            b.user_index as user_index,
            sum(a.prob * b.user_prob) as prob
        from tmp_p3_iter1 a
        inner join tmp_p3_graph b
        on a.item_index2 = b.item_index
        group by a.item_index1, b.user_index
    ) a
) a
where rank <= 100;

drop index if exists tmp_p3_iter2_idx1;
create index tmp_p3_iter2_idx1 on tmp_p3_iter2(user_index);

drop index if exists tmp_p3_iter2_idx2;
create index tmp_p3_iter2_idx2 on tmp_p3_iter2(item_index, user_index);


-- Item:User x User:Item 그래프로 Item:Item 그래프 생성
drop table if exists tmp_p3_iter3;

create table tmp_p3_iter3 as
select
    item_index1 as target_item_index,
    item_index2 as cross_item_index,
    prob as score
from (
   select
      item_index1, item_index2, prob, row_number() over (partition by item_index1 order by prob desc) as rank
   from
      (
        select
            a.item_index as item_index1,
            b.item_index as item_index2,
            sum(a.prob * b.user_prob) as prob
        from tmp_p3_iter2 a
        inner join tmp_p3_graph b
        on a.user_index = b.user_index and a.item_index != b.item_index
        group by a.item_index, b.item_index
   ) a
) a
where rank <= 100;

Done.
1307512 rows affected.
Done.
Done.
Done.
Done.
Done.
3694654 rows affected.
Done.
Done.
Done.
Done.
Done.
4317406 rows affected.


[]

In [11]:
item_id = '146cde7'
result = %sql select * from product where item_id = :item_id;
displayImageUrls(getImageUrls(result, 'item_image'))
result2 = %sql select * from tmp_p3_iter3 a join tmp_item_index b on a.target_item_index = b.item_index join tmp_item_index c on a.cross_item_index = c.item_index join product d on c.item_id = d.item_id where b.item_id = :item_id order by score desc limit 20;
displayImageUrls(getImageUrls(result2, 'item_image'))

1 rows affected.


20 rows affected.
