In [4]:
from pymongo import MongoClient

In [5]:
client = MongoClient('localhost', 27017)
db = client['pn']

In [6]:
purchase_list = list(db['transactions'].find({'items.1': { '$exists': True } }, projection=['items']))

In [7]:
len(purchase_list)

23288

# Network Analysis

In [121]:
import json
class ProductNerwork:
    def __init__(self, graph):
        self.graph = graph
        self.communities = graph.community_fastgreedy('weight').as_clustering()
        for index, vertex in enumerate(self.graph.vs):
            vertex.update_attributes({ 'community': self.communities.membership[index], 'id': index })

    def get_communities(self, sort=True):
        dics = []
        for subgraph in self.communities.subgraphs():
            nums = len(subgraph.vs)
            weight_sum = sum([edge['weight'] for edge in subgraph.es]) * (nums) / nums / (nums + 1)
            comm_name = [ node['name'] for node in subgraph.vs]
            dic = {
                'weight': weight_sum,
                'items': comm_name
            }
            dics.append(dic)
        if sort:
            return sorted(dics, key=lambda x : x['weight'], reverse=True)
        return dics

    def get_connectors(self):
        items = []
        for index, value in enumerate(self.graph.betweenness(weights='weight')):
            if value > 0:
                items.append({ 'name': self.graph.vs[index]['name'], 'betweeness': value })
        items.sort(key=lambda x: x['betweeness'], reverse=True)
        return items
    
    def normalizer(self, max_degree):
        max_value = max_degree
        min_value = 1
        def normalize(value):
            return (value - min_value) / max_value + 1
        return normalize
    
    def to_json(self):
        norm = self.normalizer(self.graph.maxdegree())
        nodes = []
        edges = []
        for edge in self.graph.es:
            edge_attr = {}
            edge_attr['from'], edge_attr['to'] = edge.tuple
            edge_attr['weight'] = edge['weight']
            edges.append(edge_attr)
        for node in self.graph.vs:
            node_attr = {}
            node_attr = { key: node[key] for key in node.attributes()}
            node_attr['degree'] = node.degree()
            nodes.append(node_attr)
        return json.dumps({
            'nodes': nodes,
            'edges': edges,
        }, indent=4)

In [122]:
import igraph
from itertools import filterfalse, combinations

class NetworkConverter:
    def __init__(self, purchase_list):
        self.purchase_list = purchase_list
    
    def convert(self, method='degree-price', support=0.001):
        support = int(len(self.purchase_list) * support)
        result = {}
        nodes = set()
        for transaction in purchase_list:
            itemsets = transaction['items']
            if len(itemsets) > 1:
                edge_list = list(self.find_edges_in_list(itemsets))
                length = len(edge_list)
                for edge_dict_tuple in edge_list:
                    edge = tuple([dic['單品名稱'] for dic in edge_dict_tuple])
                    weight = sum([dic['amount'] for dic in edge_dict_tuple]) / length
                    if edge in result or (edge[1], edge[0]) in result:
                        edge_in_list = edge if edge in result else (edge[1], edge[0])
                        result[edge_in_list]['count'] += 1
                        result[edge_in_list]['weight'] += weight
                    else:
                        result[edge] = {}
                        result[edge]['count'] = 1
                        result[edge]['weight'] = weight
        for key in list(result.keys()):
            if result[key]['count'] < support:
                del result[key]
        for items in result.keys():
            for item in items:
                if item not in nodes:
                    nodes.add(item)
        return self.to_graph(nodes, result)
    
    def find_edges_in_list(self, itemsets):
        """Return the combinations of the itemsets.
        """
        result = []
        return combinations(itemsets, 2)
    
    def to_graph(self, nodes, edges):
        g = igraph.Graph()
        for node in nodes:
            g.add_vertex(node)
        for edge, attrs in edges.items():
            weight = attrs['weight'] if attrs['weight'] > 0 else 1
            g.add_edge(edge[0], edge[1], weight=weight)
        return ProductNerwork(g)

In [123]:
converter = NetworkConverter(purchase_list)

In [124]:
product_network = converter.convert()

In [125]:
print('Node number: {}\nEdge number: {}'.format(len(product_network.graph.vs), len(product_network.graph.es)))

Node number: 94
Edge number: 116


# 排序Community

In [126]:
communities = product_network.get_communities()
for community in communities:
    print(community['weight'])
    print(' '.join(community['items']))
    print('==============')

14504.052910052918
戰禍邪神第１１章 戰禍邪神第１２章
2512.6666666666665
店到店ＦＰ手續費 ＦＰ店到店
2102.6444444444446
空瓶回收（銷售用） 紅標料理米酒
1832.2153812853812
伊藤園蘋果紅茶 Ｃｒｅａｍ－Ｏ黑巧克力三明治餅 促銷券０６ 頑皮滷蛋－原味
916.73667998668
蕃薯（１５元） 統一陽光無糖高纖豆漿 蕃薯（２５元） 全家熱狗麵包 鮮奶茶 光泉米漿 鮪魚飯糰 蕃薯（２０元） 茶葉蛋（銷售用） 經典原味熱狗 簡單點無加糖優酪乳 肉鬆飯糰
889.5
熱拿鐵大杯 冰拿鐵大杯
770.0
辣味ＹＵＲＯＣＫ魚薯條無 原味ＹＵＲＯＣＫ魚薯條無
639.3333333333334
雅虎拍賣寄件 店到店雅虎拍賣手續費
554.631746031746
麥香紅茶ＴＰ３００ 鹼性離子水 金牌台啤罐裝（６入） （新）銷售用購物袋１８號袋 麥香奶茶ＴＰ３００ 伯朗咖啡 金牌台灣啤酒
491.53304711161854
黃金厚切魚板 特級花枝丸 白玉蘿蔔 手工高麗菜捲 旗魚黑輪 黃金魚豆腐 究極味付蛋 日式黑輪 讚岐烏龍麵 關東煮本舖拉麵 千層玉子燒 蟹肉糰子 海鮮魚卵棒
432.5460317460317
台鐵手續費 台鐵取票
408.5
中信外１５ 雅虎拍賣繳費 合庫代１５ 代收手續費１５ 玉山淘寶款
385.8518518518519
高鐵取票 高鐵手續費
333.1198412698413
促銷券１０ ＭＭ花生巧克力
330.0
雅虎線上寄件 店到店雅虎拍賣線上手續費
305.8333333333333
自由時報 聯合報 蘋果日報
301.2962962962964
寶物交易代 代收手續費２５
286.87824632883456
優格軟糖（Ｏｒａｎｇｅ） 奶香綠茶３３０ＭＬ 優格軟糖（Ｐｅａｃｈ） 代收折價卷
109.50000000000001
通行繳費 代收手續費５ ｅＴａｇ繳費
101.54829931972786
欣林瓦斯費 台灣自來水 台灣大哥大 台灣電力 國泰世華卡 國民年金代 台新信用卡 代收手續費４ 勞保費代收 花旗信用卡 健保費代收 玉山信用卡 中華電信
47.55079365079365
代收手續費６ 台中裁罰單
0.3333333333333333
蝦皮寄件Ｗ 商

# 利用Betweeness找出可能是connector的節點

In [127]:
connectors = product_network.get_connectors()

In [128]:
for connector in connectors:
    print('{:<5}: {:<5}'.format(connector['betweeness'], connector['name']))

127.0: 茶葉蛋（銷售用）
87.0 : （新）銷售用購物袋１８號袋
36.0 : 台灣自來水
33.0 : 中華電信 
32.0 : 鮪魚飯糰 
30.0 : 關東煮本舖拉麵
27.0 : 白玉蘿蔔 
27.0 : 台新信用卡
21.0 : 國泰世華卡
20.0 : 台灣大哥大
17.0 : 光泉米漿 
17.0 : 經典原味熱狗
11.0 : 國民年金代
11.0 : 代收手續費４
6.0  : 代收手續費１５
3.0  : 代收折價卷
3.0  : 促銷券０６
2.0  : 蟹肉糰子 
1.0  : 黃金厚切魚板
1.0  : 代收手續費５
1.0  : 蘋果日報 


# 更新Vertex的attribute(Community)

In [129]:
data = product_network.to_json()

In [130]:
with open('data.json', 'w', encoding='utf-8') as file:
    file.write(data)