In [None]:
# create visualisation of first n users
n = 10
sampled_nodes = ['u' + str(i) for i in range(1,n+1)]

G_vis = nx.Graph()
G_vis.add_nodes_from(sampled_nodes)

for node in sampled_nodes:
    for n in G.neighbors(node):
        G_vis.add_edge(node, n)

# kanvas.show('basic_network_visualisation.html')
kanvas = Network(height=800, width=800, notebook=True) # pyvis
kanvas.from_nx(G_vis)
kanvas.show('test.html')

In [None]:
user_degree = [G.degree(n) for n in G.nodes() if n[0] == 'u']
repos_degree = [G.degree(n) for n in G.nodes() if n[0] == 'r']

user_degree, count_user_degree = np.unique(user_degree, return_counts=True)
repos_degree, count_repos_degree = np.unique(repos_degree, return_counts=True)

#Normalize
count_user_degree = count_user_degree / sum(count_user_degree)
count_repos_degree = count_repos_degree / sum(count_repos_degree)

#CDF
cdf_user_degree = user_degree.cumsum() / np.sum(user_degree)
cdf_repos_degree = repos_degree.cumsum() / np.sum(repos_degree)

#CCDF
ccdf_user_degree = 1-cdf_user_degree
ccdf_repos_degree = 1-cdf_repos_degree

#Plotting on a log-log scale
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(13, 12))
axes[0][0].plot(repos_degree, count_repos_degree,'.')
axes[0][0].set_xlabel('Degree')
axes[0][0].set_title('Repos degree')
axes[0][0].set_ylabel('p(k)')
axes[0][0].set_yscale('log')
axes[0][0].set_xscale('log')


axes[0][1].plot(user_degree, count_user_degree,'.')
axes[0][1].set_title('User degree')
axes[0][1].set_xlabel('Degree')
axes[0][1].set_ylabel('p(k)')
axes[0][1].set_yscale('log')
axes[0][1].set_xscale('log')

#CCDF
#Repos ccdf
axes[1][0].plot(repos_degree,ccdf_repos_degree)
axes[1][0].set_title('Repos CCDF')
axes[1][0].set_xlabel('x')
axes[1][0].set_ylabel('p(k>=x)')
axes[1][0].set_yscale('log')
axes[1][0].set_xscale('log')

#User ccdf
axes[1][1].plot(user_degree,ccdf_user_degree)
axes[1][1].set_title('USER CCDF')
axes[1][1].set_xlabel('x')
axes[1][1].set_ylabel('p(k>=x)')
axes[1][1].set_yscale('log')
axes[1][1].set_xscale('log')
plt.show()

In [None]:
#Power law fit
x_data = [repos_degree,user_degree,repos_degree,user_degree]
y_data = [count_repos_degree,count_user_degree,ccdf_repos_degree,ccdf_user_degree]
titles = ['Repos degree','User degree','Repos CCDF','User CCDF']

#Transforming the scale and checking for 0 (where log is not defined)
# 0 comes from the CCDF because there is a 0 probability that x will be larger than the absolutely largest 
# degree in the network

x_trans_data = [np.where(i != 0, np.log10(i), 0) for i in x_data]
y_trans_data = [np.where(i != 0, np.log10(i), 0) for i in y_data]


fig, axes = plt.subplots(2,2, figsize=(10, 10), facecolor='w', edgecolor='k')
axes = axes.ravel()

for i in range(4):
    x,y = x_trans_data[i].reshape((-1,1)), y_trans_data[i]

    model = LinearRegression()
    model = model.fit(x,y)

    xs = np.linspace(np.min(x),np.max(x)+0.5,100)

    ys = model.predict(xs.reshape((-1,1)))

    axes[i].plot(x,y,'.')
    axes[i].plot(xs,ys)
    axes[i].set_title(titles[i])
    

## #0X Community discovery

We decided to do community discovery for these selected projected - backboned networks "...." using random walk and label propagation. To evaluate the existence of communities we used Modularity, Coverage and Performance

Modularity 
- NA book

Coverage
- The coverage of a partition is the ratio of the number of intra-community edges to the total number of edges in the graph.

Performance
- The performance of a partition is the number of intra-community edges plus inter-community non-edges divided by the total number of potential edges.

##### Initiate a graph for the backboned network

In [None]:
backboned_unipartite_name = 'df_table_simple_weight'
edge_list = pd.read_csv(f"{PATH_TO['data']['backboning']}/{backboned_unipartite_name}.csv")
G = nx.from_pandas_edgelist(edge_list,'src','trg', edge_attr='score')

##### Find communitites

In [None]:
from cscripts import community_discovery

method_name = 'label_prop_semi' # label_prop_semi, 'label_prop_asyn','max_modularity', 'random_walk'
communities = community_discovery.CD_unipartite(G,method = method_name,weight_name='score')

print(len(communities)) #number of communities

#### Measure the existance of communities

In [None]:
method_name = 'modularity' # modularity, coverage,performance

metric = community_discovery.Partition_measure(G,communities ,method = method_name,weight_name='score')

print(metric)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=58d54238-eda4-4682-9c9f-301f49ceb237' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>