<a href="https://colab.research.google.com/github/panghanwu/tibame_project/blob/main/recommendation_oop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 模糊搜尋功能
---

### 會使用到的套件
- gensim
- py2neo
- numpy

In [1]:
# 下載fastText繁體中文模型並解壓縮
!wget https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zh.300.bin.gz
!gunzip cc.zh.300.bin.gz


--2020-12-03 04:11:39--  https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zh.300.bin.gz
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.22.74.142, 172.67.9.4, 104.22.75.142, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.22.74.142|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4478681770 (4.2G) [application/octet-stream]
Saving to: ‘cc.zh.300.bin.gz’


2020-12-03 04:14:40 (23.6 MB/s) - ‘cc.zh.300.bin.gz’ saved [4478681770/4478681770]



In [2]:
# py2neo是python控制neo4j的套件
# Colab並未內建需要另外安裝
!pip install py2neo

Collecting py2neo
[?25l  Downloading https://files.pythonhosted.org/packages/4f/86/4cb8118794ab5965335bc8f3315c414a05cbbe5d9f978f8fcbed1bc819af/py2neo-2020.1.1-py2.py3-none-any.whl (185kB)
[K     |█▊                              | 10kB 16.8MB/s eta 0:00:01[K     |███▌                            | 20kB 15.2MB/s eta 0:00:01[K     |█████▎                          | 30kB 9.4MB/s eta 0:00:01[K     |███████                         | 40kB 8.0MB/s eta 0:00:01[K     |████████▉                       | 51kB 4.4MB/s eta 0:00:01[K     |██████████▋                     | 61kB 4.8MB/s eta 0:00:01[K     |████████████▍                   | 71kB 5.0MB/s eta 0:00:01[K     |██████████████▏                 | 81kB 5.3MB/s eta 0:00:01[K     |███████████████▉                | 92kB 5.2MB/s eta 0:00:01[K     |█████████████████▋              | 102kB 5.7MB/s eta 0:00:01[K     |███████████████████▍            | 112kB 5.7MB/s eta 0:00:01[K     |█████████████████████▏          | 122kB 5.7MB/s 

In [3]:
from gensim.models.fasttext import FastText

word2vec = FastText.load_fasttext_format('cc.zh.300.bin')

In [11]:
import numpy as np
import py2neo as neo


class Neo4jRecomBot():
    # 1. __init__: 連上Neo4j伺服器
    # 2. fussy_search: 模糊搜尋
    # 3. same_search: 同款搜尋
    # 4. fit_search: 穿搭搜尋
    def __init__(self, sever_link, password, word2vec):
        # 載入圖資料庫
        self.graph = neo.Graph(sever_link, password=password)
        # 提取出產品清單
        self.product_list = list(neo.NodeMatch(self.graph, labels=frozenset(['Product'])))
        # 載入語料詞向量檔
        self.word2vec = word2vec
    

    def fussy_search(self, keyword, gender=None):
        assert gender in ['man', 'woman', None]
        
        # 依據性別更改商品節點清單
        if gender == 'man':
            search_list = [x for x in self.product_list if x['sn'][0]=='M']
        elif gender == 'woman':
            search_list = [x for x in self.product_list if x['sn'][0]=='F']
        else:
            search_list = self.product_list

        # 把辨識描述用word2vec轉成300為的詞向量
        key_vec = np.zeros(300)
        for d in keyword:
            # 所有詞向量加總
            key_vec += self.word2vec.wv[d]

        # 把產品詞向量存成矩陣
        pro_vec = np.empty((len(search_list),300))
        for i, n in enumerate(search_list):
            str_vec = n['vector']
            pro_vec[i] = np.fromstring(str_vec, sep=' ')
        
        # 找出夾角最小（最大cos）商品的索引
        dot  = np.dot(key_vec, pro_vec.T)
        norm = np.linalg.norm(key_vec) * np.linalg.norm(pro_vec, axis=1)
        cos  = dot / norm
        idx  = np.argmax(cos)

        return (search_list[idx],
                search_list[idx]['sn'], 
                search_list[idx]['name'], 
                search_list[idx]['image_url'])
        
    # 輸入商品節點回傳同款商品
    def same_search(self, product):
        same_relate = list(neo.RelationshipMatch(self.graph, 
                                                 nodes=[product], 
                                                 r_type='SAME'))
        same_node = same_relate[0].end_node
        if same_node != []:
            return (same_node,
                    same_node['sn'],
                    same_node['name'],
                    same_node['image_url'])
        else:
            return '無同款'   

    def fit_search(self, product, top=1):
        fit_relate = neo.RelationshipMatch(self.graph, nodes=[product], r_type='FIT')
        fit_rank = list(fit_relate.order_by('_.score DESC'))
        fit_score = fit_rank[top-1]['score']
        fit_node = fit_rank[top-1].end_node
        return (fit_node, 
                fit_score,
                fit_node['sn'],
                fit_node['name'],
                fit_node['image_url'])


### 範例

In [12]:
# 建立物件
recommdation = Neo4jRecomBot('bolt://54.234.73.102:33059', 'whistle-ratios-poles', word2vec)

recommdation.graph

Graph('bolt://neo4j@54.234.73.102:33059')

### 模糊搜尋

In [13]:
# 模糊搜尋
keyword = ['拼接','很熱','素色','短袖']

recommdation.fussy_search(keyword, gender='man')



(Node('Product', description='男裝防潑水保暖HEAT PADDED夾層外套 棕色', image_url='https://im.uniqlo.com/images/tw/gu/pc/goods/326106/item/36_326106.jpg', name='防潑水保暖夾層外套', sn='MU10', vector='-0.40450905263423920.33090162929147482.7399329561740160.69817805290222170.09941582195460796-0.06349645555019379-0.3571976562961936-1.34178243018686772.274991698563099-0.5456850528717041-1.3174986348021775-0.240445189177989960.8712516594678164-0.5117278490215541.1381994886323810.9088711404183414-0.540621554479003-0.004292753525078297-0.074846197851002221.0640525212511420.3405560222454369-0.041701512411236760.27526096859946847-0.3702615776564926-0.0148667264729738241.1011846736073494-1.8057769453153014-0.7327896218630485-0.3526333924382925-0.0576067883521318440.076538130640983580.47523917164653540.220774346031248570.8698887107893825-0.075629606842994691.2496846909634770.7198170758783817-0.8391554597765207-0.92060064338147640.5164873488247395-0.3034529108554125-0.60474186902865770.48201547190546990.489960625767707

In [14]:
# 因為回傳值為tuple
# 所以可以如此定義參數取值
main_recom, main_sn, main_name, main_url = recommdation.fussy_search(keyword, gender='woman')
# 注意這邊性別改為女生所以推薦結果會不一樣
main_name



'雙口袋工作襯衫連身裙'

### 同款搜尋

In [15]:
recommdation.same_search(main_recom)

(Node('Product', description='女裝雙口袋工作襯衫連身裙(長袖) 雙口袋設計，充滿時尚感的襯衫連身裙。可搭配上衣或開襟外套，很百搭的設計。在換季時很方便的商品。 咖啡色', image_url='https://im.uniqlo.com/images/tw/gu/pc/goods/328287/item/39_328287.jpg', name='雙口袋工作襯衫連身裙', sn='FF17', vector='-0.7370765311643481-0.2216735910624265717.714856041595344.660980170592666-0.45854356768541040.7518688887357712-0.8187373052351177-5.8448508725268779.247508107684553-2.2224747203290462-2.465726841473952-0.0195867054862901572.1381499427370727-1.43807160481810571.46271036055986772.6171819996088743-0.23933079533162527-2.7847795598790982.07226206967607142.090256851428421-2.004897393664578-0.58688578963483452.5685903880512340.70247421722160653.2791940739843994-0.6865977077977732-0.8982895771041512-1.158790784975281-3.6193320583552120.3099144248990342-0.119845787528902290.76334361732006073.39235265401657671.54274689918383960.052332062972709542.6591584056150171.8577556253876537-5.666199726052582-0.081005488522350791.92755912995926340.8117517458740622-1.45562927448190751.80048

In [9]:
same_recom, same_sn, same_name, same_url = recommdation.fussy_search(keyword, gender='woman')

same_name



'雙口袋工作襯衫連身裙'

### 穿搭推薦

In [16]:
# 一階穿搭
fit_recom_first, fit_score_f, fit_sn_f, fit_name_f, fit_url_f = recommdation.fit_search(main_recom, top=1)

print('一階穿搭:', fit_name_f, '分數:', fit_score_f)

一階穿搭: 羅紋針織坦克背心 分數: 0.845


In [18]:
# 二階穿搭
fit_recom_second, fit_score_s, fit_sn_s, fit_name_s, fit_url_s = recommdation.fit_search(fit_recom_first, top=1)

print('二階穿搭:', fit_name_s, '分數:', fit_score_s)

二階穿搭: 經典款風衣 分數: 0.934


### 測試區

In [None]:
graph = neo.Graph('bolt://54.234.73.102:33059', password='whistle-ratios-poles')

graph

In [None]:
# 同款
same_relate = list(neo.RelationshipMatch(graph, nodes=[main_recom], r_type='SAME'))
same_node = same_relate[0].end_node

same_node['sn']

In [None]:
# 一階穿搭
fit_relate = neo.RelationshipMatch(graph, nodes=[main_recom], r_type='FIT')
fit_rank = list(fit_relate.order_by('_.score DESC'))

for r in fit_rank:
    print(r)

In [None]:
top = 1
fit_score = fit_rank[top-1]['score']
fit_node = fit_rank[top-1].end_node

print('一階穿搭:', fit_node['name'], '分數:', fit_score)