In [1]:
from pymongo import MongoClient

In [2]:
client = MongoClient('localhost', 27017)
db = client['pn']

In [3]:
purchase_list = list(db['transactions'].find({'items.1': { '$exists': True } }, projection=['items']))

In [5]:
len(purchase_list)

23288

# Network Analysis

In [6]:
from itertools import filterfalse, combinations

def find_edges_in_list(itemsets):
    result = []
    return combinations(itemsets, 2)

def convert(purchase_list, support):
    result = {}
    nodes = set()
    for transaction in purchase_list:
        itemsets = transaction['items']
        if len(itemsets) > 1:
            edge_list = list(find_edges_in_list(itemsets))
            length = len(edge_list)
            for edge_dict_tuple in edge_list:
                edge = tuple([dic['單品名稱'] for dic in edge_dict_tuple])
                weight = sum([dic['amount'] for dic in edge_dict_tuple]) / length
                if edge in result:
                    result[edge]['count'] += 1
                    result[edge]['weight'] += weight
                else:
                    result[edge] = {}
                    result[edge]['count'] = 1
                    result[edge]['weight'] = weight
    for key in list(result.keys()):
        if result[key]['count'] < support:
            del result[key]
    for items in result.keys():
        for item in items:
            if item not in nodes:
                nodes.add(item)
    return (nodes, result)

In [7]:
nodes, edges_dict = convert(purchase_list, 8)

In [8]:
print('Node number: {}\nEdge number: {}'.format(len(nodes), len(edges_dict)))

Node number: 194
Edge number: 453


In [9]:
import igraph

In [10]:
g = igraph.Graph()

In [11]:
for node in nodes:
    g.add_vertex(node)

In [12]:
len(g.vs)

194

In [13]:
for edge, attrs in edges_dict.items():
    weight = attrs['weight'] if attrs['weight'] > 0 else 1
    g.add_edge(edge[0], edge[1], weight=weight)

In [14]:
g.simplify(combine_edges={ "weight": "sum" })
g.to_undirected(mode='mutual', combine_edges={ "weight": "sum" })

In [15]:
communities = g.community_fastgreedy('weight')

In [16]:
cluster = communities.as_clustering()

# 排序Community

In [17]:
dics = []
for subgraph in cluster.subgraphs():
    nums = len(subgraph.vs)
    weight_sum = sum([edge['weight'] for edge in subgraph.es]) * (nums) / nums / (nums + 1)
    comm_name = [ node['name'] for node in subgraph.vs]
    dic = {
        'weight': weight_sum,
        'comms': ' '.join(comm_name)
    }
    dics.append(dic)
a = sorted(dics, key=lambda x : x['weight'], reverse=True)
for dic in a:
    print(dic['comms'])
    print("===========")

戰禍邪神第１１章 戰禍邪神第１２章
ＦＰ店到店 店到店ＦＰ手續費
紅標料理米酒 空瓶回收（銷售用）
頑皮滷蛋－原味 Ｃｒｅａｍ－Ｏ黑巧克力三明治餅 促銷券０６ 伊藤園蘋果紅茶
冰拿鐵大杯 熱拿鐵大杯
原味ＹＵＲＯＣＫ魚薯條無 辣味ＹＵＲＯＣＫ魚薯條無
店到店雅虎拍賣手續費 雅虎拍賣寄件
熱美式中杯 大口奶油蕈菇起司雞排飯糰 綠豆沙牛乳 特濃抹茶拿鐵 熱拿鐵中杯 大口法香烤雞飯糰
特濃黑可可 蕃薯（２０元） 商店街取件 簡單點原味優酪乳 全家熱狗麵包 黑胡椒熱狗 義美古早傳統無糖豆奶 茶葉蛋（銷售用） 蕃薯（１５元） 蝦皮取件Ｃ 統一陽光無糖高纖豆漿 特濃咖啡拿鐵 蕃薯（２５元） 香蕉單入 義美奶茶 簡單點無加糖優酪乳 蕃薯（３０元） 鮮奶茶 爆濃起司熱狗 養樂多 經典原味熱狗 頂級鮮奶優格－莓果穀物脆片 肉鬆起酥麵包 熱拿鐵小杯
千層玉子燒 手工高麗菜捲 鮮香菇 黃金魚豆腐 關東煮本舖冬粉 日式黑輪 白玉蘿蔔 野菜多多魚餅 關東煮本舖拉麵 究極味付蛋 蟹肉糰子 王子麵 杏鮑菇 特級花枝丸 黃金厚切魚板 讚岐烏龍麵 筊白筍 旗魚黑輪 海鮮魚卵棒
肉鬆飯糰 ＬＣＡ活菌原味發酵乳 光泉無加糖鮮豆漿 光泉米漿 林鳳營全脂鮮乳 雪花蛋糕 鮪魚飯糰
台鐵手續費 台鐵取票
合庫代１５ 雅虎拍賣繳費 代收手續費１５ 玉山淘寶款 中信外１５
高鐵手續費 高鐵取票
促銷券１０ ＭＭ花生巧克力
聯合報 蘋果日報 自由時報
代收手續費２５ 寶物交易代
優格軟糖（Ｏｒａｎｇｅ） 優格軟糖（Ｐｅａｃｈ） ＡＷ極酷嗆涼紫冰野莓口香糖 超涼口香糖（袋） 代收折價卷 奶香綠茶３３０ＭＬ 潤喉糖－蜂蜜檸檬
衛生冰 麥香阿薩姆奶茶 原萃日式綠茶 可口可樂ＰＥＴ 麥香綠茶ＴＰ３００ 飲冰室茶集紅奶茶 奶油雞排歐姆蛋燴飯 飲冰室茶集綠奶茶 紫米紅豆湯圓 純喫茶紅茶 蔥爆牛肉燴飯 純喫茶綠茶６５０ｍｌ 金牌台灣啤酒５００ＭＬ 麥香錫蘭奶茶 麥香紅茶ＴＰ３００ 藍山咖啡 純喫茶無糖綠茶６５０ｍｌ 茶裏王日式綠茶 統一大布丁（雞蛋口味） 金牌台啤罐裝（６入） 天然水２．２Ｌ 台鹽海洋鹼性離子水 （新）銷售用購物袋１８號袋 鹼性離子水 冷泡茶冷萃綠茶無糖 泡沫綠茶ＴＰ３００ 冷泡茶冰釀烏龍 天然水 金牌台灣啤酒 七星１０毫克硬盒香煙 雲絲頓紅１０毫克香煙 （新）４５號銷售用購物袋 伯朗咖啡 日式蒜香燒豚飯 金蘋果調

# 利用Betweeness找出可能是connector的節點

In [19]:
items = []
for index, value in enumerate(g.betweenness(weights='weight')):
    if value > 0:
        items.append({ 'name': g.vs[index]['name'], 'betweeness': value })
items.sort(key=lambda x: x['betweeness'], reverse=True)

In [20]:
items[0:10]

[{'name': '（新）銷售用購物袋１８號袋', 'betweeness': 2875.0},
 {'name': '茶葉蛋（銷售用）', 'betweeness': 2560.0},
 {'name': '促銷券０６', 'betweeness': 726.0},
 {'name': '代收折價卷', 'betweeness': 506.0},
 {'name': '熱拿鐵中杯', 'betweeness': 419.0},
 {'name': '麥香奶茶ＴＰ３００', 'betweeness': 256.0},
 {'name': '經典原味熱狗', 'betweeness': 255.0},
 {'name': '肉鬆飯糰', 'betweeness': 248.0},
 {'name': '鮪魚飯糰', 'betweeness': 173.0},
 {'name': '黑胡椒熱狗', 'betweeness': 172.0}]

# 更新Vertex的attribute(Community)

In [21]:
for index, vertex in enumerate(g.vs):
    vertex.update_attributes({ 'community': cluster.membership[index], 'id': index })

In [22]:
def normalizer(max_degree):
    max_value = max_degree
    min_value = 1
    def normalize(value):
        return (value - min_value) / max_value + 1
    return normalize

In [35]:
import json
def to_json(graph, cluster):
    for index, vertex in enumerate(graph.vs):
        vertex.update_attributes({ 'community': cluster.membership[index], 'id': index })
    norm= normalizer(graph.maxdegree())
    nodes = []
    edges = []
    for edge in graph.es:
        edge_attr = {}
        edge_attr['from'], edge_attr['to'] = edge.tuple
        edge_attr['weight'] = edge['weight']
        edges.append(edge_attr)
    for node in graph.vs:
        node_attr = {}
        node_attr = { key: node[key] for key in node.attributes()}
        node_attr['degree'] = node.degree()
        nodes.append(node_attr)
    return json.dumps({
        'nodes': nodes,
        'edges': edges,
    }, indent=4)

In [36]:
data = to_json(g, cluster)

In [37]:
with open('data.json', 'w', encoding='utf-8') as file:
    file.write(data)