In [1]:
from pymongo import MongoClient

In [2]:
client = MongoClient('localhost', 27017)
db = client['pn']

In [3]:
purchase_list = list(db['transactions'].find({'items.1': { '$exists': True } }, projection=['items']))

In [4]:
len(purchase_list)

23288

In [5]:
print(purchase_list[1])

{'_id': ObjectId('5ceb55f7227c2680ca884f72'), 'items': [{'品號-品名稱': '19-吐司蛋糕', '群號-群名稱': '192-蛋糕', '銷售單價': 35.0, '單品名稱': '雞蛋牛奶捲', 'amount': 70.0}, {'品號-品名稱': '00-傳統代收', '群號-群名稱': '18-代碼繳費', '銷售單價': 0.0, '單品名稱': '代碼繳費１', 'amount': 0.0}]}


# Network Analysis

In [127]:
import json
class ProductNerwork:
    def __init__(self, graph):
        self.graph = graph
        self.communities = graph.community_fastgreedy(weights='weight').as_clustering()
        for index, vertex in enumerate(self.graph.vs):
            vertex.update_attributes({ 'community': self.communities.membership[index], 'id': index })

    def get_communities(self, sort=True):
        dics = []
        for subgraph in self.communities.subgraphs():
            nums = len(subgraph.vs)
            weight_sum = sum([edge['weight'] for edge in subgraph.es]) * (nums) / nums / (nums + 1)
            comm_name = [ node['name'] for node in subgraph.vs]
            dic = {
                'weight': weight_sum,
                'items': comm_name
            }
            dics.append(dic)
        if sort:
            return sorted(dics, key=lambda x : x['weight'], reverse=True)
        return dics

    def get_connectors(self):
        items = []
        for index, value in enumerate(self.graph.betweenness(weights='weight')):
            if value > 0:
                items.append({ 'name': self.graph.vs[index]['name'], 'betweeness': value })
        items.sort(key=lambda x: x['betweeness'], reverse=True)
        return items
    
    def normalizer(self, max_degree):
        max_value = max_degree
        min_value = 1
        def normalize(value):
            return (value - min_value) / max_value + 1
        return normalize
    
    def to_json(self):
        norm = self.normalizer(self.graph.maxdegree())
        nodes = []
        edges = []
        for edge in self.graph.es:
            edge_attr = {}
            edge_attr['from'], edge_attr['to'] = edge.tuple
            edge_attr['weight'] = edge['weight']
            edges.append(edge_attr)
        for node in self.graph.vs:
            node_attr = {}
            node_attr = { key: node[key] for key in node.attributes()}
            node_attr['degree'] = node.degree()
            nodes.append(node_attr)
        return json.dumps({
            'nodes': nodes,
            'edges': edges,
        }, indent=4)

In [6]:
import igraph
from itertools import filterfalse, combinations

class NetworkConverter:
    def __init__(self, purchase_list):
        self.purchase_list = purchase_list
    
    def convert(self, method = 'degree-price', support=0.002):
        support = int(len(self.purchase_list) * support)
        result = {}
        nodes = set()
        for transaction in purchase_list:
            itemsets = transaction['items']
            if len(itemsets) > 1:
                edge_list = list(self.find_edges_in_list(itemsets))
                length = len(edge_list)
                for edge_dict_tuple in edge_list:
                    edge = tuple([dic['單品名稱'] for dic in edge_dict_tuple])
                    if method == 'degree-price':
                        weight = sum([dic['amount'] for dic in edge_dict_tuple]) / length
                        if edge in result or (edge[1], edge[0]) in result:
                            edge_in_list = edge if edge in result else (edge[1], edge[0])
                            result[edge_in_list]['count'] += 1
                            result[edge_in_list]['weight'] += weight
                        else:
                            result[edge] = {}
                            result[edge]['count'] = 1
                            result[edge]['weight'] = weight
                    elif method == 'adjust-degree':
                        weight = 1 / length
                        if edge in result or (edge[1], edge[0]) in result:
                            edge_in_list = edge if edge in result else (edge[1], edge[0])
                            result[edge_in_list]['count'] += 1
                            result[edge_in_list]['weight'] += weight
                        else:
                            result[edge] = {}
                            result[edge]['count'] = 1
                            result[edge]['weight'] = weight
                    else:
                        weight =1
                        if edge in result or (edge[1], edge[0]) in result:
                            edge_in_list = edge if edge in result else (edge[1], edge[0])
                            result[edge_in_list]['count'] += 1
                        else:
                            result[edge] = {}
                            result[edge]['count'] = 1
                            result[edge]['weight'] = weight
        for key in list(result.keys()):
            if result[key]['count'] < support:
                del result[key]
        for items in result.keys():
            for item in items:
                if item not in nodes:
                    nodes.add(item)
        return self.to_graph(nodes, result)
    
    def find_edges_in_list(self, itemsets):
        """Return the combinations of the itemsets.
        """
        result = []
        return combinations(itemsets, 2)
    
    def to_graph(self, nodes, edges):
        g = igraph.Graph()
        for node in nodes:
            g.add_vertex(node)
        for edge, attrs in edges.items():
            weight = attrs['weight'] if attrs['weight'] > 0 else 1
            g.add_edge(edge[0], edge[1], weight=weight)
        return ProductNerwork(g)

In [135]:
#adjusting

import igraph
import numpy as np
from itertools import filterfalse, combinations

class NetworkConverter:
    def __init__(self, purchase_list):
        self.purchase_list = purchase_list
    
    def convert(self, method = 'degree-price', support = 0.0002):
        #support = int(len(self.purchase_list) * support)
        result = {}
        nodes = set()
        for transaction in purchase_list:
            itemsets = transaction['items']
            if len(itemsets) > 1:
                edge_list = list(self.find_edges_in_list(itemsets))
                length = len(edge_list)
                for edge_dict_tuple in edge_list:
                    edge = tuple([dic['單品名稱'] for dic in edge_dict_tuple])
                    weight = 1
                    if method == 'degree-price':
                        weight = sum([dic['amount'] for dic in edge_dict_tuple]) / length
                    elif method == 'adjust-degree':
                        weight = 1 / length
                    else:
                        weight =1
                        
                    if edge in result or (edge[1], edge[0]) in result:
                        edge_in_list = edge if edge in result else (edge[1], edge[0])
                        result[edge_in_list]['count'] += 1
                        result[edge_in_list]['weight'] += weight
                    else:
                        result[edge] = {}
                        result[edge]['count'] = 1
                        result[edge]['weight'] = weight
        
        weight_list = []
        for key in list(result.keys()):
            weight_list.append(result[key]['weight'])    
        weight_support = np.percentile(weight_list, (1-support)*100)
        print(weight_support)
        for key in list(result.keys()):
            if result[key]['weight'] < weight_support:
                del result[key]
        for items in result.keys():
            for item in items:
                if item not in nodes:
                    nodes.add(item)
        return self.to_graph(nodes, result)
    
    
    def find_edges_in_list(self, itemsets):
        """Return the combinations of the itemsets.
        """
        result = []
        return combinations(itemsets, 2)
    
    def to_graph(self, nodes, edges):
        g = igraph.Graph()
        for node in nodes:
            g.add_vertex(node)
        for edge, attrs in edges.items():
            weight = attrs['weight'] if attrs['weight'] > 0 else 1
            g.add_edge(edge[0], edge[1], weight=weight)
        return ProductNerwork(g)

In [136]:
converter = NetworkConverter(purchase_list)

In [152]:
product_network = converter.convert('degree-price')

2250.0960000006307


In [153]:
print('Node number: {}\nEdge number: {}'.format(len(product_network.graph.vs), len(product_network.graph.es)))

Node number: 18
Edge number: 10


# 排序Community

In [148]:
communities = product_network.get_communities()
for community in communities:
    print(community['weight'])
    print(' '.join(community['items']))
    print('==============')

127.0
代收手續費４ 健保費代收 國民年金代
85.33333333333333
戰禍邪神第１２章 戰禍邪神第１１章
72.0
伊藤園蘋果紅茶 促銷券０６
43.333333333333336
ＦＰ店到店 店到店ＦＰ手續費
35.0
台鐵手續費 台鐵取票
34.666666666666664
紅標料理米酒 空瓶回收（銷售用）
29.0
中華電信 台灣自來水
28.333333333333332
高鐵手續費 高鐵取票
28.0
經典原味熱狗 全家熱狗麵包


# 利用Betweeness找出可能是connector的節點

In [149]:
connectors = product_network.get_connectors()

In [150]:
for connector in connectors:
    print('{:<5}: {:<5}'.format(connector['betweeness'], connector['name']))

1.0  : 代收手續費４


# 更新Vertex的attribute(Community)

In [151]:
data = product_network.to_json()

In [106]:
with open('default.json', 'w', encoding='utf-8') as file:
    file.write(data)