In [None]:
import numpy as np
import networkx as nx
import numpy.linalg as la
import scipy.cluster.vq as vq
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import statistics
import csv
import warnings
warnings.filterwarnings('ignore')

def block_planted_clustering_quality(labels, num_members_in_cluster):
  score = 0
  label_first = 0
  label_second = 1
  k = 0
  for i in range(0,num_members_in_cluster):
    if labels[i] == 1:
      k += 1
  if (k >= num_members_in_cluster/2):
    label_first = 1
    label_second = 0
  for i in range(0, num_members_in_cluster):
    if labels[i] == label_first:
      score += 1
  for i in range(num_members_in_cluster, 2 * num_members_in_cluster):
    if labels[i] == label_second:
      score += 1
  score = score / (num_members_in_cluster * 2)
  return score

def spectral_clustering(graph, num_clusters, num_members_in_cluster):
  A = nx.adjacency_matrix(graph)
  D = np.diag(np.ravel(np.sum(A,axis=1)))
  L = D - A
  l, U = la.eigh(L)
  means, labels_ = vq.kmeans2(U[:,1:num_clusters], num_clusters)
  score_ = block_planted_clustering_quality(labels_, num_members_in_cluster)
  return score_, labels_

def spectral_clustering_debug(graph, num_clusters, num_members_in_cluster):
  A = nx.adjacency_matrix(graph)
  D = np.diag(np.ravel(np.sum(A,axis=1)))
  L = D - A
  l, U = la.eigh(L)
  means, labels_ = vq.kmeans2(U[:,1:num_clusters], num_clusters)
  #kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(U[:,1:num_clusters])
  #labels_ = kmeans.labels_
  score_ = block_planted_clustering_quality(labels_, num_members_in_cluster)
  return score_, labels_, U[:,1:num_clusters]


def find_bad_p_in(p_out = 0.1, start_p_in = 1, num_groups = 2, num_members = 100, step = 0.001, num_repeats = 100):
  p_in = start_p_in
  result = []
  while (p_in >= p_out):
    scores = []
    for i in range(0, num_repeats):
      graph_ = nx.planted_partition_graph(num_groups,num_members,p_in,p_out)
      score, labels = spectral_clustering(graph_, num_groups, num_members)
      print(labels)
      scores.append(score)
    result.append([p_in, statistics.mean(scores)])
    p_in -= step
  return result

def find_bad_p_in_iterates(p_out = 0.1, start_p_in = 1, num_groups = 2, num_members = 50, step = 0.01, num_iterates = 1000, num_repeats = 100):
  p_in = start_p_in
  result = []
  cur_i = 0
  while (p_in >= p_out and cur_i < num_iterates):
    scores = []
    for i in range(0, num_repeats):
      graph_ = nx.planted_partition_graph(num_groups,num_members,p_in,p_out)
      score, labels = spectral_clustering(graph_, num_groups, num_members)
      scores.append(score)
    result.append([p_in, statistics.mean(scores)])
    p_in -= step
    cur_i += 1
    if i % 10 == 0:
      print('Iter: ' + str(i))
  return result

def spectral_clustering_to_get_p_out(start_p_out = 0, p_in = 1, num_groups = 2, num_members = 100, step = 0.01, num_iterates = 10000, num_repeats = 250, file_name = "fixed_p_out.csv"):
  p_out = start_p_out
  result = []
  cur_i = 0
  while (p_out <= p_in and cur_i < num_iterates):
    scores = []
    for i in range(0, num_repeats):
      graph_ = nx.planted_partition_graph(num_groups,num_members,p_in,p_out)
      score, labels = spectral_clustering(graph_, num_groups, num_members)
      scores.append(score)
    result.append([p_out, statistics.mean(scores)])
    p_out += step
    cur_i += 1
    print('Iter: ' + str(cur_i))
  with open(file_name, "w", newline ='') as f:
      writer = csv.writer(f)
      writer.writerow(['p_in', str(p_in)])
      writer.writerow(['p_out', 'score'])
      for item in result:
        writer.writerow([str(item[0]), str(item[1])])
  return result

def min_cut_clustering_to_get_p_out(start_p_out = 0.0005, p_in = 1, num_groups = 2, num_members = 100,
                                    step = 0.0005, num_iterates = 10000, num_repeats = 20, file_name = "fixed_p_out.csv"):
  p_out = start_p_out
  result = []
  cur_i = 0
  while (p_out <= 0.005 and cur_i < num_iterates):
    scores = []
    for i in range(0, num_repeats):
      graph_ = nx.planted_partition_graph(num_groups,num_members,p_in,p_out)
      score, labels = stoer_wagner_cut_clustering(graph_, num_groups, num_members)
      scores.append(score)
    result.append([p_out, statistics.mean(scores)])
    p_out += step
    cur_i += 1
    print('Iter: ' + str(cur_i))
  with open(file_name, "w", newline ='') as f:
      writer = csv.writer(f)
      writer.writerow(['p_in', str(p_in)])
      writer.writerow(['p_out', 'score'])
      for item in result:
        writer.writerow([str(item[0]), str(item[1])])
  return result



#time is num_repeats_p_in*num_iterates_p_in*
def find_p_in_depending_on_p_out(start_p_out = 0.5, num_groups = 2, num_members = 100, step = 0.1, step_p_in = 0.001, num_iterates_p_in = 500, num_repeats_p_in = 1000):
  cur_i = 0
  p_out = start_p_out
  while(p_out > 0):
    result = find_bad_p_in_iterates(p_out = p_out, start_p_in = p_out + 0.5, num_groups = num_groups, num_members = num_members, step = step_p_in, num_iterates = num_iterates_p_in, num_repeats = num_repeats_p_in)
    with open("p_in_score_with_p_out_iter_" + str(cur_i) + ".csv", "w") as f:
      writer = csv.writer(f)
      writer.writerow(['p_out', str(p_out)])
      writer.writerow(['p_in', 'score'])
      for item in result:
        writer.writerow([str(item[0]), str(item[1])])
    print("P_out " +str(p_out) +" done.")
    p_out -= step
    cur_i += 1
 


def stoer_wagner_cut_clustering(graph, num_clusters, num_members_in_cluster):
    px, pk = nx.stoer_wagner(graph)
    #print(pk)
    score_ = 0
    labels_ = [None] * (num_clusters * num_members_in_cluster)
    for i in pk[0]:
        labels_[i] = 0
    for i in pk[1]:
        labels_[i] = 1
    score_ = block_planted_clustering_quality(labels_, num_members_in_cluster)
    return score_, labels_

def stoer_wagner_cut_clustering_debug(graph, num_clusters, num_members_in_cluster):
    px, pk = nx.stoer_wagner(graph)
    #print(pk)
    score_ = 0
    labels_ = [None] * (num_clusters * num_members_in_cluster)
    for i in pk[0]:
        labels_[i] = 0
    for i in pk[1]:
        labels_[i] = 1
    score_ = block_planted_clustering_quality(labels_, num_members_in_cluster)
    return score_, labels_, px


def stoer_wagner_find_bad_p_in_iterates(p_out = 0.1, start_p_in = 1, num_groups = 2, num_members = 50, step = 0.0001, num_iterates = 1000, num_repeats = 100):
  p_in = start_p_in
  result = []
  cur_i = 0
  while (p_out >0 and cur_i < num_iterates):
    scores = []
    for i in range(0, num_repeats):
      graph_ = nx.planted_partition_graph(num_groups,num_members,1,p_out)
      score, labels = stoer_wagner_cut_clustering(graph_, num_groups, num_members)
      scores.append(score)
      #if (i % 100) is 0:
      #print('*')
    result.append([p_out, statistics.mean(scores)])
    p_out -= step
    cur_i += 1
    if (cur_i % 10) is 0:
      print('Iter: ' + str(cur_i))
  return result

#time is num_repeats_p_in*num_iterates_p_in*
def stoer_wagner_find_p_in_depending_on_p_out(start_p_out = 0.01, num_groups = 2, num_members = 100, step = 0.1, step_p_in = 0.001, num_iterates_p_in = 10000, num_repeats_p_in = 100):
  cur_i = 0
  p_out = start_p_out
  global_result = []
  while(p_out > 0):
    result = stoer_wagner_find_bad_p_in_iterates(p_out = p_out, start_p_in = 1, num_groups = num_groups, num_members = num_members, step = step_p_in, num_iterates = num_iterates_p_in, num_repeats = num_repeats_p_in)
    with open("stoer_wagner_p_in_score_with_p_out_iter_" + str(cur_i)  + ".csv", "w") as f:
      writer = csv.writer(f)
      writer.writerow(['p_out', str(p_out)])
      writer.writerow(['p_in', 'score'])
      for item in result:
        writer.writerow([str(item[0]), str(item[1])])
    global_result.append([p_out, result])
    print("P_out " +str(p_out) +" done.")
    p_out -= step
    cur_i += 1
  return global_result

In [None]:
find_p_in_depending_on_p_out(num_iterates_p_in=100000, start_p_out= 0.1, step_p_in= 0.001)


KeyboardInterrupt: 

In [None]:
stoer_wagner_find_p_in_depending_on_p_out(start_p_out= 0.04, step_p_in= 0.0005 )

Iter: 10
Iter: 20
Iter: 30
Iter: 40
Iter: 50


KeyboardInterrupt: 

spectral_clustering_to_get_p_out(p_in = 1, file_name = "p_in_1")

In [None]:
spectral_clustering_to_get_p_out(p_in = 1, file_name = "p_in_1.csv")

Iter: 1
Iter: 2
Iter: 3
Iter: 4
Iter: 5
Iter: 6
Iter: 7
Iter: 8
Iter: 9
Iter: 10
Iter: 11
Iter: 12
Iter: 13
Iter: 14
Iter: 15
Iter: 16
Iter: 17
Iter: 18
Iter: 19
Iter: 20
Iter: 21
Iter: 22
Iter: 23
Iter: 24
Iter: 25
Iter: 26
Iter: 27
Iter: 28
Iter: 29
Iter: 30
Iter: 31
Iter: 32
Iter: 33
Iter: 34
Iter: 35
Iter: 36
Iter: 37
Iter: 38
Iter: 39
Iter: 40
Iter: 41
Iter: 42
Iter: 43
Iter: 44
Iter: 45
Iter: 46
Iter: 47
Iter: 48
Iter: 49
Iter: 50
Iter: 51
Iter: 52
Iter: 53
Iter: 54
Iter: 55
Iter: 56
Iter: 57
Iter: 58
Iter: 59
Iter: 60
Iter: 61
Iter: 62
Iter: 63
Iter: 64
Iter: 65
Iter: 66
Iter: 67
Iter: 68
Iter: 69
Iter: 70
Iter: 71
Iter: 72
Iter: 73
Iter: 74
Iter: 75
Iter: 76
Iter: 77
Iter: 78
Iter: 79
Iter: 80
Iter: 81
Iter: 82
Iter: 83
Iter: 84
Iter: 85
Iter: 86
Iter: 87
Iter: 88
Iter: 89
Iter: 90
Iter: 91
Iter: 92
Iter: 93
Iter: 94
Iter: 95
Iter: 96
Iter: 97
Iter: 98
Iter: 99
Iter: 100


[[0, 0.968],
 [0.01, 0.972],
 [0.02, 0.978],
 [0.03, 0.974],
 [0.04, 0.97],
 [0.05, 0.978],
 [0.060000000000000005, 0.984],
 [0.07, 0.982],
 [0.08, 0.98],
 [0.09, 0.986],
 [0.09999999999999999, 0.992],
 [0.10999999999999999, 0.992],
 [0.11999999999999998, 0.988],
 [0.12999999999999998, 0.996],
 [0.13999999999999999, 0.99],
 [0.15, 1.0],
 [0.16, 0.984],
 [0.17, 0.99],
 [0.18000000000000002, 0.996],
 [0.19000000000000003, 0.984],
 [0.20000000000000004, 0.994],
 [0.21000000000000005, 0.994],
 [0.22000000000000006, 0.992],
 [0.23000000000000007, 0.992],
 [0.24000000000000007, 0.996],
 [0.25000000000000006, 0.99],
 [0.26000000000000006, 0.99],
 [0.2700000000000001, 0.996],
 [0.2800000000000001, 1.0],
 [0.2900000000000001, 0.996],
 [0.3000000000000001, 0.998],
 [0.3100000000000001, 0.998],
 [0.3200000000000001, 0.996],
 [0.3300000000000001, 0.996],
 [0.34000000000000014, 1.0],
 [0.35000000000000014, 1.0],
 [0.36000000000000015, 0.996],
 [0.37000000000000016, 1.0],
 [0.38000000000000017, 0.99

In [None]:
spectral_clustering_to_get_p_out(p_in = 0.8, file_name = "p_in_08.csv")

Iter: 1
Iter: 2
Iter: 3
Iter: 4
Iter: 5
Iter: 6
Iter: 7
Iter: 8
Iter: 9
Iter: 10
Iter: 11
Iter: 12
Iter: 13
Iter: 14
Iter: 15
Iter: 16
Iter: 17
Iter: 18
Iter: 19
Iter: 20
Iter: 21
Iter: 22
Iter: 23
Iter: 24
Iter: 25
Iter: 26
Iter: 27
Iter: 28
Iter: 29
Iter: 30
Iter: 31
Iter: 32
Iter: 33
Iter: 34
Iter: 35
Iter: 36
Iter: 37
Iter: 38
Iter: 39
Iter: 40
Iter: 41
Iter: 42
Iter: 43
Iter: 44
Iter: 45
Iter: 46
Iter: 47
Iter: 48
Iter: 49
Iter: 50
Iter: 51
Iter: 52
Iter: 53
Iter: 54
Iter: 55
Iter: 56
Iter: 57
Iter: 58
Iter: 59
Iter: 60
Iter: 61
Iter: 62
Iter: 63
Iter: 64
Iter: 65
Iter: 66
Iter: 67
Iter: 68
Iter: 69
Iter: 70
Iter: 71
Iter: 72
Iter: 73
Iter: 74
Iter: 75
Iter: 76
Iter: 77
Iter: 78
Iter: 79
Iter: 80


[[0, 0.984],
 [0.01, 0.964],
 [0.02, 0.974],
 [0.03, 0.984],
 [0.04, 0.974],
 [0.05, 0.992],
 [0.060000000000000005, 0.978],
 [0.07, 0.984],
 [0.08, 0.986],
 [0.09, 0.992],
 [0.09999999999999999, 0.99],
 [0.10999999999999999, 0.996],
 [0.11999999999999998, 0.996],
 [0.12999999999999998, 0.988],
 [0.13999999999999999, 0.992],
 [0.15, 0.994],
 [0.16, 0.982],
 [0.17, 0.998],
 [0.18000000000000002, 0.994],
 [0.19000000000000003, 0.998],
 [0.20000000000000004, 0.988],
 [0.21000000000000005, 0.998],
 [0.22000000000000006, 0.994],
 [0.23000000000000007, 0.994],
 [0.24000000000000007, 1.0],
 [0.25000000000000006, 0.998],
 [0.26000000000000006, 1.0],
 [0.2700000000000001, 0.998],
 [0.2800000000000001, 1.0],
 [0.2900000000000001, 0.998],
 [0.3000000000000001, 1.0],
 [0.3100000000000001, 0.998],
 [0.3200000000000001, 0.994],
 [0.3300000000000001, 0.996],
 [0.34000000000000014, 1.0],
 [0.35000000000000014, 0.998],
 [0.36000000000000015, 1.0],
 [0.37000000000000016, 0.998],
 [0.38000000000000017, 0

In [None]:
spectral_clustering_to_get_p_out(p_in = 0.6, file_name = "p_in_06.csv")

Iter: 1
Iter: 2
Iter: 3
Iter: 4
Iter: 5
Iter: 6
Iter: 7
Iter: 8
Iter: 9
Iter: 10
Iter: 11
Iter: 12
Iter: 13
Iter: 14
Iter: 15
Iter: 16
Iter: 17
Iter: 18
Iter: 19
Iter: 20
Iter: 21
Iter: 22
Iter: 23
Iter: 24
Iter: 25
Iter: 26
Iter: 27
Iter: 28
Iter: 29
Iter: 30
Iter: 31
Iter: 32
Iter: 33
Iter: 34
Iter: 35
Iter: 36
Iter: 37
Iter: 38
Iter: 39
Iter: 40
Iter: 41
Iter: 42
Iter: 43
Iter: 44
Iter: 45
Iter: 46
Iter: 47
Iter: 48
Iter: 49
Iter: 50
Iter: 51
Iter: 52
Iter: 53
Iter: 54
Iter: 55
Iter: 56
Iter: 57
Iter: 58
Iter: 59
Iter: 60


[[0, 0.96],
 [0.01, 0.978],
 [0.02, 0.98],
 [0.03, 0.99],
 [0.04, 0.988],
 [0.05, 0.988],
 [0.060000000000000005, 0.988],
 [0.07, 0.992],
 [0.08, 0.99],
 [0.09, 0.998],
 [0.09999999999999999, 0.984],
 [0.10999999999999999, 0.994],
 [0.11999999999999998, 0.99],
 [0.12999999999999998, 0.998],
 [0.13999999999999999, 0.99],
 [0.15, 1.0],
 [0.16, 0.996],
 [0.17, 0.996],
 [0.18000000000000002, 0.998],
 [0.19000000000000003, 1.0],
 [0.20000000000000004, 0.996],
 [0.21000000000000005, 1.0],
 [0.22000000000000006, 0.998],
 [0.23000000000000007, 1.0],
 [0.24000000000000007, 0.996],
 [0.25000000000000006, 1.0],
 [0.26000000000000006, 1.0],
 [0.2700000000000001, 1.0],
 [0.2800000000000001, 1.0],
 [0.2900000000000001, 0.99998],
 [0.3000000000000001, 0.99998],
 [0.3100000000000001, 0.99998],
 [0.3200000000000001, 0.99992],
 [0.3300000000000001, 0.99988],
 [0.34000000000000014, 0.99952],
 [0.35000000000000014, 0.997],
 [0.36000000000000015, 0.99266],
 [0.37000000000000016, 0.9904],
 [0.38000000000000

In [None]:
min_cut_clustering_to_get_p_out(p_in = 1, file_name = "simple_mincut_p_in_1.csv")

Iter: 1
Iter: 2
Iter: 3
Iter: 4
Iter: 5
Iter: 6
Iter: 7
Iter: 8
Iter: 9


[[0.0005, 1.0],
 [0.001, 1.0],
 [0.0015, 1.0],
 [0.002, 1.0],
 [0.0025, 1.0],
 [0.003, 1.0],
 [0.0035, 1.0],
 [0.004, 1.0],
 [0.0045000000000000005, 1.0]]

In [None]:
min_cut_clustering_to_get_p_out(p_in = 0.8, file_name = "simple_mincut_p_in_08.csv")

Iter: 1
Iter: 2
Iter: 3
Iter: 4
Iter: 5
Iter: 6
Iter: 7
Iter: 8
Iter: 9


KeyboardInterrupt: 

In [None]:
min_cut_clustering_to_get_p_out(p_in = 0.6, file_name = "simple_mincut_p_in_06.csv")

In [None]:
num_gr = 2
num_memb = 10
num_rep = 1
for i in range(0, num_rep):
      graph_2 = nx.planted_partition_graph(num_gr,num_memb,1,0)
      score_2, labels_2, u = spectral_clustering_debug(graph_2, num_gr, num_memb)
      print("number of edges: " + str(nx.number_of_edges(graph_2)))
      if score_2 - 1 < 0:
            print(score_2)
            print(labels_2)
            print(u)
      

number of edges: 90


In [None]:
num_gr = 2
num_memb = 10
num_rep = 10
for i in range(0, num_rep):
      graph_2 = nx.planted_partition_graph(num_gr,num_memb,1,0.1)
      score_2, labels_2, cut_size = stoer_wagner_cut_clustering_debug(graph_2, num_gr, num_memb)
      if score_2 - 1 < 0:
            print("number of edges: " + str(nx.number_of_edges(graph_2)))
            print(score_2)
            print(labels_2)
            print(cut_size)

number of edges: 102
0.55
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
9
number of edges: 101
0.55
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1]
9
number of edges: 103
0.55
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
9
number of edges: 100
0.55
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
9
number of edges: 105
0.55
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
9
number of edges: 99
0.55
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]
9
