In [1]:
from pymongo import MongoClient

In [2]:
client = MongoClient('localhost', 27017)
db = client['pn']

In [3]:
purchase_list = list(db['transactions'].find({'items.1': { '$exists': True } }, projection=['items']))

In [4]:
len(purchase_list)

23288

In [50]:
print(purchase_list[1])

{'_id': ObjectId('5ceb55f7227c2680ca884f72'), 'items': [{'品號-品名稱': '19-吐司蛋糕', '群號-群名稱': '192-蛋糕', '銷售單價': 35.0, '單品名稱': '雞蛋牛奶捲', 'amount': 70.0}, {'品號-品名稱': '00-傳統代收', '群號-群名稱': '18-代碼繳費', '銷售單價': 0.0, '單品名稱': '代碼繳費１', 'amount': 0.0}]}


# Network Analysis

In [161]:
import json
class ProductNerwork:
    def __init__(self, graph):
        self.graph = graph
        self.communities = graph.community_fastgreedy(weights='weight').as_clustering()
        for index, vertex in enumerate(self.graph.vs):
            vertex.update_attributes({ 'community': self.communities.membership[index], 'id': index })

    def get_communities(self, sort=True):
        dics = []
        for subgraph in self.communities.subgraphs():
            nums = len(subgraph.vs)
            weight_sum = sum([edge['weight'] for edge in subgraph.es]) * (nums) / nums / (nums + 1)
            comm_name = [ node['name'] for node in subgraph.vs]
            dic = {
                'weight': weight_sum,
                'items': comm_name
            }
            dics.append(dic)
        if sort:
            return sorted(dics, key=lambda x : x['weight'], reverse=True)
        return dics

    def get_connectors(self):
        items = []
        for index, value in enumerate(self.graph.betweenness(weights='weight')):
            if value > 0:
                items.append({ 'name': self.graph.vs[index]['name'], 'betweeness': value })
        items.sort(key=lambda x: x['betweeness'], reverse=True)
        return items
    
    def normalizer(self, max_degree):
        max_value = max_degree
        min_value = 1
        def normalize(value):
            return (value - min_value) / max_value + 1
        return normalize
    
    def to_json(self):
        norm = self.normalizer(self.graph.maxdegree())
        nodes = []
        edges = []
        for edge in self.graph.es:
            edge_attr = {}
            edge_attr['from'], edge_attr['to'] = edge.tuple
            edge_attr['weight'] = edge['weight']
            edges.append(edge_attr)
        for node in self.graph.vs:
            node_attr = {}
            node_attr = { key: node[key] for key in node.attributes()}
            node_attr['degree'] = node.degree()
            nodes.append(node_attr)
        return json.dumps({
            'nodes': nodes,
            'edges': edges,
        }, indent=4)

In [166]:
import igraph
from itertools import filterfalse, combinations

class NetworkConverter:
    def __init__(self, purchase_list):
        self.purchase_list = purchase_list
    
    def convert(self, method = 'degree-price', support=0.002):
        support = int(len(self.purchase_list) * support)
        result = {}
        nodes = set()
        for transaction in purchase_list:
            itemsets = transaction['items']
            if len(itemsets) > 1:
                edge_list = list(self.find_edges_in_list(itemsets))
                length = len(edge_list)
                for edge_dict_tuple in edge_list:
                    edge = tuple([dic['單品名稱'] for dic in edge_dict_tuple])
                    if method == 'degree-price':
                        weight = sum([dic['amount'] for dic in edge_dict_tuple]) / length
                        if edge in result or (edge[1], edge[0]) in result:
                            edge_in_list = edge if edge in result else (edge[1], edge[0])
                            result[edge_in_list]['count'] += 1
                            result[edge_in_list]['weight'] += weight
                        else:
                            result[edge] = {}
                            result[edge]['count'] = 1
                            result[edge]['weight'] = weight
                    elif method == 'adjust-degree':
                        weight = 1 / length
                        if edge in result or (edge[1], edge[0]) in result:
                            edge_in_list = edge if edge in result else (edge[1], edge[0])
                            result[edge_in_list]['count'] += 1
                            result[edge_in_list]['weight'] += weight
                        else:
                            result[edge] = {}
                            result[edge]['count'] = 1
                            result[edge]['weight'] = weight
                    else:
                        weight =1
                        if edge in result or (edge[1], edge[0]) in result:
                            edge_in_list = edge if edge in result else (edge[1], edge[0])
                            result[edge_in_list]['count'] += 1
                        else:
                            result[edge] = {}
                            result[edge]['count'] = 1
                            result[edge]['weight'] = weight
        for key in list(result.keys()):
            if result[key]['count'] < support:
                del result[key]
        for items in result.keys():
            for item in items:
                if item not in nodes:
                    nodes.add(item)
        return self.to_graph(nodes, result)
    
    def find_edges_in_list(self, itemsets):
        """Return the combinations of the itemsets.
        """
        result = []
        return combinations(itemsets, 2)
    
    def to_graph(self, nodes, edges):
        g = igraph.Graph()
        for node in nodes:
            g.add_vertex(node)
        for edge, attrs in edges.items():
            weight = attrs['weight'] if attrs['weight'] > 0 else 1
            g.add_edge(edge[0], edge[1], weight=weight)
        return ProductNerwork(g)

In [167]:
converter = NetworkConverter(purchase_list)

In [168]:
product_network = converter.convert('degree', 0.001)

In [169]:
print('Node number: {}\nEdge number: {}'.format(len(product_network.graph.vs), len(product_network.graph.es)))

Node number: 94
Edge number: 116


# 排序Community

In [170]:
communities = product_network.get_communities()
for community in communities:
    print(community['weight'])
    print(' '.join(community['items']))
    print('==============')

3.4285714285714284
黃金魚豆腐 千層玉子燒 手工高麗菜捲 白玉蘿蔔 究極味付蛋 海鮮魚卵棒 日式黑輪 旗魚黑輪 蟹肉糰子 關東煮本舖拉麵 讚岐烏龍麵 特級花枝丸 黃金厚切魚板
1.2857142857142858
台灣電力 玉山信用卡 台灣大哥大 國泰世華卡 花旗信用卡 欣林瓦斯費 健保費代收 台新信用卡 中華電信 台灣自來水 勞保費代收 國民年金代 代收手續費４
1.0
蕃薯（１５元） 鮮奶茶 肉鬆飯糰 全家熱狗麵包 光泉米漿 蕃薯（２０元） 鮪魚飯糰 蕃薯（２５元） 經典原味熱狗 統一陽光無糖高纖豆漿 簡單點無加糖優酪乳 茶葉蛋（銷售用）
0.875
麥香奶茶ＴＰ３００ （新）銷售用購物袋１８號袋 麥香紅茶ＴＰ３００ 伯朗咖啡 金牌台啤罐裝（６入） 金牌台灣啤酒 鹼性離子水
0.6666666666666666
雅虎拍賣繳費 合庫代１５ 中信外１５ 代收手續費１５ 玉山淘寶款
0.6
奶香綠茶３３０ＭＬ 代收折價卷 優格軟糖（Ｏｒａｎｇｅ） 優格軟糖（Ｐｅａｃｈ）
0.6
頑皮滷蛋－原味 促銷券０６ 伊藤園蘋果紅茶 Ｃｒｅａｍ－Ｏ黑巧克力三明治餅
0.5
自由時報 聯合報 蘋果日報
0.5
代收手續費５ 通行繳費 ｅＴａｇ繳費
0.3333333333333333
戰禍邪神第１２章 戰禍邪神第１１章
0.3333333333333333
促銷券１０ ＭＭ花生巧克力
0.3333333333333333
辣味ＹＵＲＯＣＫ魚薯條無 原味ＹＵＲＯＣＫ魚薯條無
0.3333333333333333
空瓶回收（銷售用） 紅標料理米酒
0.3333333333333333
蝦皮寄件Ｆ 商店街寄件Ｆ
0.3333333333333333
店到店雅虎拍賣線上手續費 雅虎線上寄件
0.3333333333333333
冰拿鐵大杯 熱拿鐵大杯
0.3333333333333333
台中裁罰單 代收手續費６
0.3333333333333333
寶物交易代 代收手續費２５
0.3333333333333333
商店街取件 蝦皮取件Ｃ
0.3333333333333333
店到店雅虎拍賣手續費 雅虎拍賣寄件
0.3333333333333333
ＦＰ店到店 店到店ＦＰ手續費
0.3333333333333333
台鐵取票 台鐵手續費
0.333333333333333

# 利用Betweeness找出可能是connector的節點

In [123]:
connectors = product_network.get_connectors()

In [124]:
for connector in connectors:
    print('{:<5}: {:<5}'.format(connector['betweeness'], connector['name']))

26.0 : 茶葉蛋（銷售用）
25.0 : 關東煮本舖拉麵
12.0 : 代收手續費４
8.0  : 中華電信 
7.0  : 蕃薯（２０元）
7.0  : 海鮮魚卵棒
7.0  : 經典原味熱狗
6.0  : 代收手續費１５
5.0  : 台灣自來水
1.0  : 促銷券０６


# 更新Vertex的attribute(Community)

In [125]:
data = product_network.to_json()

In [126]:
with open('default.json', 'w', encoding='utf-8') as file:
    file.write(data)