In [5]:
from pymongo import MongoClient

In [6]:
client = MongoClient('localhost', 27017)
db = client['pn-pruned']

In [7]:
purchase_list = list(db['transactions'].find({'items.1': { '$exists': True } }, projection=['items']))

In [8]:
len(purchase_list)

19633

# Network Analysis

In [9]:
import json
class ProductNerwork:
    def __init__(self, graph):
        self.graph = graph
        self.communities = graph.community_fastgreedy('weight').as_clustering()
        for index, vertex in enumerate(self.graph.vs):
            vertex.update_attributes({ 'community': self.communities.membership[index], 'id': index })

    def get_communities(self, sort=True):
        dics = []
        for subgraph in self.communities.subgraphs():
            nums = len(subgraph.vs)
            weight_sum = sum([edge['weight'] for edge in subgraph.es]) * (nums) / nums / (nums + 1)
            comm_name = [ node['name'] for node in subgraph.vs]
            dic = {
                'weight': weight_sum,
                'items': comm_name
            }
            dics.append(dic)
        if sort:
            return sorted(dics, key=lambda x : x['weight'], reverse=True)
        return dics

    def get_connectors(self):
        items = []
        for index, value in enumerate(self.graph.betweenness(weights='weight')):
            if value > 0:
                items.append({ 'name': self.graph.vs[index]['name'], 'betweeness': value })
        items.sort(key=lambda x: x['betweeness'], reverse=True)
        return items
    
    def normalizer(self, max_degree):
        max_value = max_degree
        min_value = 1
        def normalize(value):
            return (value - min_value) / max_value + 1
        return normalize
    
    def to_json(self):
        norm = self.normalizer(self.graph.maxdegree())
        nodes = []
        edges = []
        for edge in self.graph.es:
            edge_attr = {}
            edge_attr['from'], edge_attr['to'] = edge.tuple
            edge_attr['weight'] = edge['weight']
            edges.append(edge_attr)
        for node in self.graph.vs:
            node_attr = {}
            node_attr = { key: node[key] for key in node.attributes()}
            node_attr['degree'] = node.degree()
            nodes.append(node_attr)
        return json.dumps({
            'nodes': nodes,
            'edges': edges,
        }, indent=4)

In [10]:
import igraph
from itertools import filterfalse, combinations

class NetworkConverter:
    def __init__(self, purchase_list):
        self.purchase_list = purchase_list
    
    def convert(self, method='degree-price', support=0.001):
        support = int(len(self.purchase_list) * support)
        result = {}
        nodes = set()
        for transaction in purchase_list:
            itemsets = transaction['items']
            if len(itemsets) > 1:
                edge_list = list(self.find_edges_in_list(itemsets))
                length = len(edge_list)
                for edge_dict_tuple in edge_list:
                    edge = tuple([dic['單品名稱'] for dic in edge_dict_tuple])
                    if method == 'degree-original':
                        weight = 1
                        if edge in result or (edge[1], edge[0]) in result:
                            edge_in_list = edge if edge in result else (edge[1], edge[0])
                            result[edge_in_list]['count'] += 1
                            result[edge_in_list]['weight'] = weight
                        else:
                            result[edge] = {}
                            result[edge]['count'] = 1
                            result[edge]['weight'] = weight
                    else:
                        if method == 'degree-price':
                            weight = sum([dic['amount'] for dic in edge_dict_tuple]) / length
                        elif method == 'degree-mod':
                            weight = 1 / length
                        
                        if edge in result or (edge[1], edge[0]) in result:
                            edge_in_list = edge if edge in result else (edge[1], edge[0])
                            result[edge_in_list]['count'] += 1
                            result[edge_in_list]['weight'] += weight
                        else:
                            result[edge] = {}
                            result[edge]['count'] = 1
                            result[edge]['weight'] = weight
        for key in list(result.keys()):
            if result[key]['count'] < support:
                del result[key]
        for items in result.keys():
            for item in items:
                if item not in nodes:
                    nodes.add(item)
        return self.to_graph(nodes, result)
    
    def find_edges_in_list(self, itemsets):
        """Return the combinations of the itemsets.
        """
        result = []
        return combinations(itemsets, 2)
    
    def to_graph(self, nodes, edges):
        g = igraph.Graph()
        for node in nodes:
            g.add_vertex(node)
        for edge, attrs in edges.items():
            weight = attrs['weight'] if attrs['weight'] > 0 else 1
            g.add_edge(edge[0], edge[1], weight=weight)
        return ProductNerwork(g)

In [12]:
converter = NetworkConverter(purchase_list)

In [14]:
product_network = converter.convert(method='degree-original')

In [15]:
print('Node number: {}\nEdge number: {}'.format(len(product_network.graph.vs), len(product_network.graph.es)))

Node number: 74
Edge number: 113


# 排序Community

In [16]:
communities = product_network.get_communities()
for community in communities:
    print(community['weight'])
    print(' '.join(community['items']))
    print('==============')

4.0
白玉蘿蔔 手工高麗菜捲 旗魚黑輪 海鮮魚卵棒 黃金魚豆腐 蟹肉糰子 究極味付蛋 日式黑輪 讚岐烏龍麵 野菜多多魚餅 特級花枝丸 黃金厚切魚板 千層玉子燒 筊白筍 關東煮本舖拉麵
1.0
全家熱狗麵包 蕃薯（１５元） 統一陽光無糖高纖豆漿 茶葉蛋（銷售用） 鮪魚飯糰 肉鬆飯糰 特濃咖啡拿鐵 ＬＣＡ活菌原味發酵乳 爆濃起司熱狗 簡單點無加糖優酪乳 蕃薯（２５元） 光泉米漿 鮮奶茶 經典原味熱狗 蕃薯（２０元） 香蕉單入
0.9333333333333333
伯朗咖啡 藍山咖啡 （新）銷售用購物袋１８號袋 七星１０毫克硬盒香煙 義美奶茶 鹼性離子水 金牌台灣啤酒５００ＭＬ 天然水２．２Ｌ 麥香奶茶ＴＰ３００ 金牌台啤罐裝（６入） （新）４５號銷售用購物袋 金牌台灣啤酒 麥香紅茶ＴＰ３００ 天然水
0.6
優格軟糖（Ｏｒａｎｇｅ） 奶香綠茶３３０ＭＬ 代收折價卷 優格軟糖（Ｐｅａｃｈ）
0.6
頑皮滷蛋－原味 伊藤園蘋果紅茶 促銷券０６ Ｃｒｅａｍ－Ｏ黑巧克力三明治餅
0.5
聯合報 蘋果日報 自由時報
0.3333333333333333
促銷券１０ ＭＭ花生巧克力
0.3333333333333333
黑巧杏仁乳加 牛奶杏仁乳加
0.3333333333333333
葡萄冰茶 蘋果冰茶
0.3333333333333333
原味ＹＵＲＯＣＫ魚薯條無 辣味ＹＵＲＯＣＫ魚薯條無
0.3333333333333333
冰拿鐵大杯 熱拿鐵大杯
0.3333333333333333
大口法香烤雞飯糰 熱拿鐵中杯
0.3333333333333333
戰禍邪神第１２章 戰禍邪神第１１章
0.3333333333333333
空瓶回收（銷售用） 紅標料理米酒
0.3333333333333333
大口奶油蕈菇起司雞排飯糰 特濃抹茶拿鐵


In [26]:
with open('D:\MIS_Project\product-netowork-demo\src\default_pruned.json', 'w', encoding='utf-8') as file:
    file.write(data)

# 利用Betweeness找出可能是connector的節點

In [39]:
connectors = product_network.get_connectors()

In [40]:
for connector in connectors:
    print('{:<5}: {:<5}'.format(connector['betweeness'], connector['name']))

127.0: 茶葉蛋（銷售用）
87.0 : （新）銷售用購物袋１８號袋
43.0 : 中華電信 
32.0 : 鮪魚飯糰 
27.0 : 台新信用卡
23.0 : 國民年金代
21.0 : 國泰世華卡
20.0 : 白玉蘿蔔 
17.0 : 經典原味熱狗
17.0 : 代收手續費４
17.0 : 光泉米漿 
13.0 : 台灣自來水
9.0  : 特級花枝丸
6.0  : 代收手續費１５
4.0  : 蟹肉糰子 
4.0  : 千層玉子燒
3.0  : 黃金厚切魚板
3.0  : 促銷券０６
3.0  : 代收折價卷
2.0  : 黃金魚豆腐
1.0  : 代收手續費５
1.0  : 蘋果日報 


# 更新Vertex的attribute(Community)

In [25]:
data = product_network.to_json()