In [1]:
# coding: utf-8
"""
サブグラフを抽出するためのコード
"""
from igraph import *
import csv
import collections
import pickle
import numpy as np
from openopt import QP

# csvファイルの読み込み
def readcsv(path):
    f = open(path, "rb")
    dataReader = csv.reader(f)
    arr = [row for row in dataReader]
    return arr

def writecsv(arr, path):
    f = open(path, "ab")
    dataWriter = csv.writer(f)
    dataWriter.writerows(arr)
    f.close()

def readdump(path):
    f = open(path, "r")
    arr = pickle.load(f)
    f.close()
    return arr

# 有向エッジリストを入力して、重み付き無向ネットワークを出力する
def cal_edgelist_to_network(list_edge):
    # 有向エッジリストを無向エッジリストに変換する
    list_edge = [tuple(sorted(row)) for row in list_edge]
    # ノードリスト
    list_vertices = list(set([word for row in list_edge for word in row]))
    # エッジリストとそのweightを作成
    tuple_edge, tuple_weight = zip(*collections.Counter(list_edge).items())
    return {"vertex": list_vertices, "edge": list(tuple_edge), "weight": list(tuple_weight)}

# クラスタリング済みのネットワークを元にサブグラフのリストを作成
# vertexには全てのvertexを代入する（PageRankを計算するため）
def cal_cluster_to_network(dict_network):
    if dict_network.has_key("cluster") == False:
        print "クラスタリングができていません"
    
    # クラスタごとにwordをまとめる
    dict_cluster = collections.defaultdict(list)
    for word, cluster in zip(dict_network["vertex"], dict_network["cluster"]):
        dict_cluster[cluster].append(word)

    # リストに変換
    list_cluster_vertex = [row[1] for row in dict_cluster.items()]
    
    # 同様にエッジとウェイトのリストも作成する
    list_cluster_edge = []
    list_cluster_weight = []
    for cluster_vertex in list_cluster_vertex:
        list_cluster_edge_one = []
        list_cluster_weight_one = []
        # エッジリストの中に、一つでもノードが含まれていれば、そのクラスのノードに含める
        for row, weight in zip(dict_network["edge"], dict_network["weight"]):
            # and と or を切り替えることによって性能の比較
            if row[0] in cluster_vertex or row[1] in cluster_vertex:
                list_cluster_edge_one.append(row)
                list_cluster_weight_one.append(weight)
        list_cluster_edge.append(list_cluster_edge_one)
        list_cluster_weight.append(list_cluster_weight_one)
    
    # まとめる
    list_dict_network = [{"vertex": dict_network["vertex"],
                          "edge": cluster_edge,
                          "weight": cluster_weight}
                         for cluster_edge, cluster_weight
                         in zip(list_cluster_edge, list_cluster_weight)]
    
    return list_dict_network

# f_measureを計算する
def cal_f_measure(list_predict_measure):
    # 生成したクラスタ内のカウント
    dict_predict_cluster = collections.defaultdict(list)
    for row in list_predict_measure:
        dict_predict_cluster[row[0]].append(row[1])
        
    # もとあるクラス内のカウント
    dict_measure_cluster = collections.defaultdict(list)
    for row in list_predict_measure:
        dict_measure_cluster[row[1]].append(row[0])
    
    # local_purityの計算
    list_purity = []
    for row in dict_predict_cluster.items():
        major_class = sorted(collections.Counter(row[1]).items(), key=lambda x: x[1], reverse=True)[0][1]
        class_num = len(row[1])
        list_purity.append([major_class, class_num])
    purity = float(np.sum(zip(*list_purity)[0])) / np.sum(zip(*list_purity)[1])
    print "Purity: ", purity
    
    # inverse_purityの計算
    list_inverse_purity = []
    for row in dict_measure_cluster.items():
        major_class = sorted(collections.Counter(row[1]).items(), key=lambda x: x[1], reverse=True)[0][1]
        class_num = len(row[1])
        list_inverse_purity.append([major_class, class_num])
    inverse_purity = float(np.sum(zip(*list_inverse_purity)[0])) / np.sum(zip(*list_inverse_purity)[1])
    print "Inverse Purity: ", inverse_purity
    
    print "F-value: ", 2 / (1 / purity + 1 / inverse_purity)
    
# 凸２次計画問題を解いてp(topic)を求めるための関数
def cal_prob_topic(dict_network_master, list_dict_network_sub):
    prob_master = np.array([row[1] for row in sorted(zip(dict_network_master["vertex"], dict_network_master["page_rank"]), key=lambda x: x[0])])
    
    for i, dict_network_sub in enumerate(list_dict_network_sub):
        if i == 0:
            prob_sub = np.array([row[1] for row in sorted(zip(dict_network_sub["vertex"], dict_network_sub["page_rank"]), key=lambda x: x[0])])
        else:
            list_tmp = np.array([row[1] for row in sorted(zip(dict_network_sub["vertex"], dict_network_sub["page_rank"]), key=lambda x: x[0])])
            prob_sub = np.vstack((prob_sub, list_tmp))
    
    H = 2 * prob_sub.dot(prob_sub.T)
    f = -2 * prob_master.dot(prob_sub.T)
    Aeq = np.ones(len(list_dict_network_sub))
    beq = 1
    lb = np.zeros(len(list_dict_network_sub))
    
    p = QP(H, f, Aeq=Aeq, beq=beq, lb=lb)
    r = p.solve("cvxopt_qp")
    k_opt = r.xf
    return k_opt

In [2]:
# エッジリストの読み込み
list_edge = readcsv("./files/kaigo_honne/list_edgelist20160127.csv")
# 元のネットワークを作成する（無向）
dict_network_master = cal_edgelist_to_network(list_edge)

In [14]:
list_edges_rev = []
list_weight_rev = []
list_nodes = []
for edges, weight in zip(dict_network_master['edge'], dict_network_master['weight']):
    if "楽しい" in edges:
        list_nodes.append(edges[0])
        list_nodes.append(edges[1])
list_nodes = list(set(list_nodes))
for edges, weight in zip(dict_network_master['edge'], dict_network_master['weight']):
    if edges[0] in list_nodes and edges[1] in list_nodes:
        list_edges_rev.append(edges)
        list_weight_rev.append(weight)
        
dict_network_rev = {'vertex': list_nodes, 'edge': list_edges_rev, 'weight': list_weight_rev}

In [15]:
g_master = Graph()
g_master.add_vertices(dict_network_rev["vertex"])
g_master.add_edges(dict_network_rev["edge"])

In [19]:
g_master.maximal_cliques(min=3, max=0)

[(49, 34, 36),
 (28, 34, 15),
 (28, 34, 29),
 (28, 34, 58),
 (72, 34, 31),
 (72, 34, 69),
 (0, 34, 61),
 (0, 34, 82),
 (79, 34, 4),
 (79, 34, 15),
 (79, 34, 84),
 (12, 29, 68, 34),
 (26, 34, 45),
 (26, 34, 48),
 (67, 29, 45, 34),
 (27, 34, 18),
 (27, 34, 45),
 (2, 34, 7),
 (2, 34, 29),
 (2, 34, 73),
 (37, 9, 34, 1),
 (37, 9, 34, 36),
 (10, 6, 34),
 (75, 23, 66, 47, 34),
 (30, 34, 9),
 (30, 34, 50),
 (30, 34, 55),
 (14, 34, 1),
 (14, 34, 18),
 (14, 34, 23),
 (55, 34, 77, 70, 45),
 (59, 34, 70, 40),
 (59, 34, 70, 45),
 (22, 34, 29, 20),
 (22, 34, 58),
 (11, 34, 9),
 (11, 34, 31),
 (11, 34, 69, 56),
 (13, 34, 66, 39),
 (13, 34, 69, 24),
 (13, 34, 69, 39),
 (44, 34, 31, 4),
 (44, 34, 53),
 (44, 34, 63),
 (57, 34, 52),
 (57, 34, 68, 20),
 (57, 34, 68, 53),
 (62, 34, 6),
 (62, 34, 9),
 (62, 34, 61, 39),
 (7, 34, 16, 45),
 (7, 34, 51),
 (7, 34, 70, 4),
 (7, 34, 70, 45),
 (7, 34, 76),
 (7, 34, 80),
 (8, 9, 34, 36),
 (8, 9, 34, 68),
 (8, 9, 34, 81),
 (51, 34, 50),
 (51, 34, 77, 61),
 (4, 34, 70