In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
from matplotlib import cm, colors

**1 -- data preparation**

In [2]:
# data IN
data = pd.read_csv("../data/languages.csv")

In [3]:
# filter functions
def drop_specifics_from_list(data, filter_list):
    """filter specific languages from list -- motivated by RM del Rio-Chanona et al 2023"""
    data = data[~data["language"].str.contains(filter_list, case=False, regex=True)]
    return data

def top_languages_filter(data, nr_languages):
    """keep top x number of languages ONLY"""
    top_languages = data.groupby(["language"])["num_pushers"].agg("sum").reset_index().sort_values(by="num_pushers", ascending=False)
    top_languages = list(top_languages["language"])[:nr_languages]
    data = data[data["language"].isin(top_languages)]
    return data
def drop_country_codes_from_list(data, country_list):
    data = data[~data["iso2_code"].isin(country_list)]
    return data

def dataframe_for_ecomplexity(data, focal_year, quarter_list):
    """aggregate and transform dataframe for ecomplexity functions"""
    data = data[(data["year"]==focal_year) & (data["quarter"].isin(quarter_list))]
    data = data\
        .groupby(["year", "iso2_code", "language"])["num_pushers"]\
        .agg("sum")\
        .reset_index()\
        .sort_values(by="num_pushers", ascending=False)    
    return data
    
def edgelist_for_github_space(data, key_columns):
    """transform the ecomplexity proximity output for visualization"""
    data = data[key_columns]
    # drop zero -- non-existing edges
    data = data[data[key_columns[2]] > 0]
    # drop self loops
    data = data[data[key_columns[0]] != data[key_columns[1]]]
    return data

In [4]:
prev_filter = "|".join(["yaml", "json", "text", "svg", "Markdown", "xml"])
df = drop_specifics_from_list(data, filter_list=prev_filter)
df = top_languages_filter(df, nr_languages=150)
df = drop_country_codes_from_list(df, country_list=["EU"])
df

Unnamed: 0,num_pushers,language,language_type,iso2_code,year,quarter
158,119186,CSS,markup,CN,2020,1
159,110283,CSS,markup,IN,2020,1
160,27000,CSS,markup,KR,2020,1
161,33798,Vue,markup,CN,2020,1
162,16465,Roff,markup,US,2020,1
...,...,...,...,...,...,...
81563,1251,Procfile,programming,PK,2023,1
81564,117,Sass,markup,EG,2022,4
81565,631,PLpgSQL,programming,PL,2023,1
81566,266,R,programming,CL,2023,1


In [5]:
top20_langs=top_languages_filter(df, nr_languages=20).language.unique()

In [6]:
df = dataframe_for_ecomplexity(df, focal_year=2021, quarter_list=[1,2,3,4])

# CoLoc

In [7]:
from CoLoc_class import CoLoc
# Dam, A., Gomez-Lievano, A., Neffke, F., & Frenken, K. (2023). 
# An information-theoretic approach to the analysis of location and colocation patterns. 
# Journal of Regional Science, 63, 173– 213. https://doi.org/10.1111/jors.12621
# https://github.com/aljevandam/Colocation

# make contingency table q
q = df.pivot(index = 'iso2_code', columns = 'language', values = 'num_pushers')

# define CoLoc class with a uniform prior, choosing the nr_prior_obs
Q = CoLoc(q, prior = 'uniform', nr_prior_obs = q.size)
#Q = CoLoc(q, prior = 'prop', nr_prior_obs = q.size)

In [8]:
Q.make_PMIpci()

language,AIDL,AMPL,ANTLR,ASP.NET,ActionScript,Ada,Apex,AppleScript,Assembly,Astro,...,VHDL,Verilog,Vim Script,Vim Snippet,Visual Basic .NET,Vue,XS,XSLT,Yacc,sed
iso2_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AE,,,,,,,,,,,...,,,,,,-0.260913,,,,
AF,,,,,,,,,,,...,,,,,,,,,,
AL,,,,,,,,,,,...,,,,,,-0.053588,,,,
AM,,,,,,,,,,,...,,,,,,0.418349,,,,
AO,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VN,,,,1.459178,,,,,-0.542411,,...,,-1.320843,-0.532669,,,0.110999,,-0.296097,-1.458452,
YE,,,,,,,,,,,...,,,,,,,,,,
ZA,,,,1.131228,,,,,-0.366482,,...,,,-0.392281,,,-0.163130,,,,
ZM,,,,,,,,,,,...,,,,,,,,,,


In [9]:
# compute location associations 
KLpc_i = Q.make_KLpc_i()
KLpc_i = KLpc_i.reset_index()
KLpc_i.columns = ['language', 'pci']
KLpc_i[["language", "pci"]].sort_values(by="pci", ascending=False).head(20)

Unnamed: 0,language,pci
9,Astro,1.874324
110,SQLPL,1.301462
105,RobotFramework,1.051735
85,OpenSCAD,1.031698
97,Puppet,0.978503
39,G-code,0.920369
66,Logos,0.904565
134,Twig,0.894201
6,Apex,0.86663
14,Brainfuck,0.861325


In [10]:
KLpc_i[["language", "pci"]].sort_values(by="pci", ascending=False).tail(20)

Unnamed: 0,language,pci
16,C#,0.096016
46,Go,0.091913
60,Jupyter Notebook,0.089861
71,Makefile,0.089336
128,Swift,0.088705
93,PowerShell,0.062868
12,Batchfile,0.050397
56,Java,0.047958
82,Objective-C,0.04787
15,C,0.039857


In [11]:
# non-significant associations are returned as NaN
PMIpci = Q.make_sigPMIpci()
PMIpci

language,AIDL,AMPL,ANTLR,ASP.NET,ActionScript,Ada,Apex,AppleScript,Assembly,Astro,...,VHDL,Verilog,Vim Script,Vim Snippet,Visual Basic .NET,Vue,XS,XSLT,Yacc,sed
iso2_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AE,,,,,,,,,,,...,,,,,,-0.260913,,,,
AF,,,,,,,,,,,...,,,,,,,,,,
AL,,,,,,,,,,,...,,,,,,,,,,
AM,,,,,,,,,,,...,,,,,,0.418349,,,,
AO,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VN,,,,1.459178,,,,,-0.542411,,...,,-1.320843,-0.532669,,,0.110999,,-0.296097,-1.458452,
YE,,,,,,,,,,,...,,,,,,,,,,
ZA,,,,1.131228,,,,,-0.366482,,...,,,-0.392281,,,-0.163130,,,,
ZM,,,,,,,,,,,...,,,,,,,,,,


In [12]:
PMIpij = Q.make_PMIpij()
PMIpij

language,AIDL,AMPL,ANTLR,ASP.NET,ActionScript,Ada,Apex,AppleScript,Assembly,Astro,...,VHDL,Verilog,Vim Script,Vim Snippet,Visual Basic .NET,Vue,XS,XSLT,Yacc,sed
language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AIDL,,,,,,,,,,,...,,,,,,,,,,
AMPL,,,,,,,,,,,...,,,,,,,,,,
ANTLR,,,,,,,,,,,...,,,,,,,,,,
ASP.NET,,,,,,,,,,,...,,,,,,,,,,
ActionScript,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vue,,,,,,,,,,,...,,,,,,,,,,
XS,,,,,,,,,,,...,,,,,,,,,,
XSLT,,,,,,,,,,,...,,,,,,,,,,
Yacc,,,,,,,,,,,...,,,,,,,,,,


In [13]:
# export
PMIpci.to_csv("../outputs/PMIpci_table.csv", sep=";", index=True)# export
PMIpij.to_csv("../outputs/PMIpij_table.csv", sep=";", index=True)

In [14]:
Q.make_PMIpij() 

language,AIDL,AMPL,ANTLR,ASP.NET,ActionScript,Ada,Apex,AppleScript,Assembly,Astro,...,VHDL,Verilog,Vim Script,Vim Snippet,Visual Basic .NET,Vue,XS,XSLT,Yacc,sed
language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AIDL,,,,,,,,,,,...,,,,,,,,,,
AMPL,,,,,,,,,,,...,,,,,,,,,,
ANTLR,,,,,,,,,,,...,,,,,,,,,,
ASP.NET,,,,,,,,,,,...,,,,,,,,,,
ActionScript,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vue,,,,,,,,,,,...,,,,,,,,,,
XS,,,,,,,,,,,...,,,,,,,,,,
XSLT,,,,,,,,,,,...,,,,,,,,,,
Yacc,,,,,,,,,,,...,,,,,,,,,,


In [15]:
Q.make_stdPMIpij()

language,AIDL,AMPL,ANTLR,ASP.NET,ActionScript,Ada,Apex,AppleScript,Assembly,Astro,...,VHDL,Verilog,Vim Script,Vim Snippet,Visual Basic .NET,Vue,XS,XSLT,Yacc,sed
language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AIDL,,,,,,,,,,,...,,,,,,,,,,
AMPL,,,,,,,,,,,...,,,,,,,,,,
ANTLR,,,,,,,,,,,...,,,,,,,,,,
ASP.NET,,,,,,,,,,,...,,,,,,,,,,
ActionScript,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vue,,,,,,,,,,,...,,,,,,,,,,
XS,,,,,,,,,,,...,,,,,,,,,,
XSLT,,,,,,,,,,,...,,,,,,,,,,
Yacc,,,,,,,,,,,...,,,,,,,,,,


In [16]:
Q.make_KLpj_i()

language
AIDL           -8.325555e-09
AMPL           -8.325555e-09
ANTLR          -8.325555e-09
ASP.NET        -8.325555e-09
ActionScript   -8.325555e-09
                    ...     
Vue            -8.325555e-09
XS             -8.325555e-09
XSLT           -8.325555e-09
Yacc           -8.325555e-09
sed            -8.325555e-09
Length: 149, dtype: float64

In [17]:
Q.make_stdKLpj_i()

language
AIDL           NaN
AMPL           NaN
ANTLR          NaN
ASP.NET        NaN
ActionScript   NaN
                ..
Vue            NaN
XS             NaN
XSLT           NaN
Yacc           NaN
sed            NaN
Length: 149, dtype: float64

In [18]:
Q.make_MIpij()

0.000762677188010118

In [None]:
# Old code

**3 -- github space visuals**

In [19]:
space_table
["language_1", "language_2", "proximity"]

NameError: name 'space_table' is not defined

In [None]:
def edgelist_for_github_space(data, key_columns):
    """transform the ecomplexity proximity output for visualization"""
    data = data[key_columns]

    # drop zero -- non-existing edges
    data = data[data[key_columns[2]] > 0]

    # drop self loops
    data = data[data[key_columns[0]] != data[key_columns[1]]]
    return data

space_table = edgelist_for_github_space(space_df, key_columns=["language_1", "language_2", "proximity"])

In [None]:
def maximum_spanning_tree(data, key_columns):
    table = data.copy()
    table["distance"] = 1.0 / table[key_columns[2]]
    G = nx.from_pandas_edgelist(table, source = key_columns[0], target = key_columns[1], edge_attr = ["distance", key_columns[2]])
    T = nx.minimum_spanning_tree(G, weight = "distance")
    table2 = nx.to_pandas_edgelist(T)
    table2 = table2[table2[key_columns[2]] > 0]
    table2.rename(columns = {"source": key_columns[0], "target": key_columns[1], key_columns[2]: "score"}, inplace = True)
    table = pd.merge(
        table,
        table2,
        on=key_columns[0:2]
    )  
    table["edge"] = table.apply(lambda x: "%s-%s" % (min(x[key_columns[0]], x[key_columns[1]]), max(x[key_columns[0]], x[key_columns[1]])), axis = 1)
    table = table.drop_duplicates(subset = ["edge"])
    table = table.drop("edge", axis=1)
    return table[key_columns]

In [None]:
# NEED to add edges above threshold
mst_el = maximum_spanning_tree(space_table, key_columns=["language_1", "language_2", "proximity"])

In [None]:
# network visual -- maximum spanning tree
mst_graph = nx.from_pandas_edgelist(mst_el, source="language_1", target="language_2")

fig, ax = plt.subplots(1,1, figsize=(15,9))
nx.draw(mst_graph, with_labels=True, ax=ax)

In [None]:
# figure out numbers
n_nodes = mst_graph.number_of_nodes()
n_edges = n_nodes * 2

In [None]:
def add_edges(mst_edges, all_edges, nr_edges_to_add):
    # drop mst edges from the full edgelist
    mst_edges["drop"] = 1
    all_edges = pd.merge(
        all_edges,
        mst_edges,
        on = ["language_1", "language_2", "proximity"],
        how="left"
    )
    all_edges = all_edges[all_edges["drop"] != 1].drop(columns="drop")

    # sort and select
    all_edges = all_edges.sort_values(by="proximity", ascending=False).iloc[:nr_edges_to_add]

    # add to mst edgelist
    software_space_el = pd.concat([mst_el, all_edges])
    return software_space_el

In [None]:
# network visual -- full software space
software_space_el = add_edges(mst_el, space_table, nr_edges_to_add=282)
ss_graph = nx.from_pandas_edgelist(software_space_el, source="language_1", target="language_2")

fig, ax = plt.subplots(1,1, figsize=(40,40))
pos = nx.drawing.nx_agraph.graphviz_layout(ss_graph,prog='neato')

nx.draw_networkx_edges(ss_graph,pos=pos,edge_color='black',width=.5,alpha=.6)

nx.draw_networkx_nodes(ss_graph, pos, node_size=250,
                       node_color='white',linewidths=1.2, alpha=1,edgecolors='black')

nx.draw_networkx_nodes(ss_graph, pos, node_size=250,
                       node_color='#56CBF9',linewidths=1.2, alpha=.5,edgecolors='black')

nx.draw_networkx_labels(ss_graph,
    pos,font_size=30)

plt.axis('off')

In [None]:
posh_list=['Stata','Julia','Mathematica','GAP','Elm','OCaml','VBA','Fortran','Cuda','Pascal','sed','Perl','awk','MATLAB','R',
'C','C++','Python','Java','Typescript','Javascript','HTML','CSS','C#','Swift','Kotlin','UnrealScript','Lisp']

In [None]:
# network visual -- full software space
software_space_el = add_edges(mst_el, space_table, nr_edges_to_add=282)
ss_graph = nx.from_pandas_edgelist(software_space_el, source="language_1", target="language_2")

fig, ax = plt.subplots(1,1, figsize=(17,10))
pos = nx.drawing.nx_agraph.graphviz_layout(ss_graph,prog='neato')

nx.draw_networkx_edges(ss_graph,pos=pos,edge_color='black',width=.5,alpha=.6)

nx.draw_networkx_nodes(ss_graph, pos, node_size=250,
                       node_color='white',linewidths=1.2, alpha=.1,edgecolors='black')

nx.draw_networkx_nodes(ss_graph, pos, node_size=250,
                       node_color='#56CBF9',linewidths=1.2, alpha=.5,edgecolors='black')

#nx.draw_networkx_labels(ss_graph.subgraph(posh_list),bbox = dict(facecolor = "white",boxstyle='round'),
#    pos=pos,font_size=15)

plt.axis('off')

In [None]:
# network visual -- full software space
software_space_el = add_edges(mst_el, space_table, nr_edges_to_add=282)
ss_graph = nx.from_pandas_edgelist(software_space_el, source="language_1", target="language_2")

fig, ax = plt.subplots(1,1, figsize=(17,10))
pos = nx.drawing.nx_agraph.graphviz_layout(ss_graph,prog='neato')

nx.draw_networkx_edges(ss_graph,pos=pos,edge_color='black',width=.5,alpha=.6)

nx.draw_networkx_nodes(ss_graph, pos, node_size=250,
                       node_color='white',linewidths=1.2, alpha=.1,edgecolors='black')

nx.draw_networkx_nodes(ss_graph, pos, node_size=250,
                       node_color='#56CBF9',linewidths=1.2, alpha=.5,edgecolors='black')

nx.draw_networkx_labels(ss_graph.subgraph(posh_list),bbox = dict(facecolor = "white",boxstyle='round'),
    pos=pos,font_size=15)

plt.axis('off')

In [None]:
# export for Herr Wachs
software_space_el.to_csv("../outputs/software_space_edgelist.csv", index=False, sep=";")

In [None]:
def software_space_of_country(el, complexity_table, country_code):
    # focal country
    ctable = complexity_table[complexity_table["iso2_code"]==country_code]
    
    # RCA filter
    ctable = ctable[ctable["rca"] >= 1]

    cel = pd.merge(
        el,
        ctable[["language", "rca"]].drop_duplicates(),
        left_on="language_1",
        right_on="language",
        how="left"
    )
    cel = pd.merge(
        cel,
        ctable[["language", "rca"]].drop_duplicates(),
        left_on="language_2",
        right_on="language",
        how="left",
        suffixes=["_i", "_j"]
    )

    # filter cel
    cel = cel.dropna(subset=["rca_i", "rca_j"])
    
    # clean up
    cel = cel[["language_1", "language_2", "proximity"]]

    return cel

In [None]:
# hungary
selected_el = software_space_of_country(software_space_el, cdf, country_code="HU")
selected_graph = nx.from_pandas_edgelist(selected_el, source="language_1", target="language_2")

fig, ax = plt.subplots(1,1, figsize=(17,10))

nx.draw_networkx_nodes(ss_graph, pos, node_size=250,
                       node_color='white',linewidths=1.2, alpha=.1,edgecolors='black')

nx.draw_networkx_edges(ss_graph,pos=pos,edge_color='black',width=.5,alpha=.6)


nx.draw_networkx_edges(selected_graph,pos=pos,edge_color='black',width=.5,alpha=.6)

nx.draw_networkx_nodes(selected_graph, pos, node_size=250,
                       node_color='white',linewidths=1.2, alpha=.1,edgecolors='black')

nx.draw_networkx_nodes(selected_graph, pos, node_size=250,
                       node_color='#56CBF9',linewidths=1.2, alpha=.5,edgecolors='black')
plt.axis('off')

In [None]:
# dataframe for regression


# Communities

In [None]:
# Adding community detection adding the default Girvan-Newman method. Alternative: Louvain
# Girvan, M., & Newman, M. E. (2002). Community structure in social and biological networks. 
# Proceedings of the National Academy of Sciences, 99(12), 7821-7826.

from networkx.algorithms import community
from scipy.spatial import ConvexHull
import matplotlib.patches as mpatches

# Community detection
software_space_el = add_edges(mst_el, space_table, nr_edges_to_add=282)
ss_graph = nx.from_pandas_edgelist(software_space_el, source="language_1", target="language_2")
communities_generator = community.girvan_newman(ss_graph)

# Define communities (number requires more justification, inter-rated or similar)
num_communities = 6
for communities in communities_generator:
    if len(communities) >= num_communities:
        break
sorted_communities = sorted(map(sorted, communities))

# Set up plot
%config InlineBackend.figure_format = 'retina'
fig, ax = plt.subplots(figsize=(18, 10))
pos = nx.drawing.nx_agraph.graphviz_layout(ss_graph, prog='neato')

# Define colors
community_colors = ['#FFF2CC', '#D5E8D4', '#FFE6CC', '#F5F5F5', '#DAE8FC', '#E1D5E7']
edge_colors = ['#D6B656', '#82B366', '#D79B00', '#666666', '#6C8EBF', '#9673A6']
node_community_color = {node: edge_colors[i] for i, community in enumerate(sorted_communities) for node in community}

# Community backgrounds
for i, community in enumerate(sorted_communities):
    community_pos = {k: pos[k] for k in community if k in pos}
    points = np.array(list(community_pos.values()))
    if len(points) > 2:
        hull = ConvexHull(points)
        ax.fill(points[hull.vertices, 0], points[hull.vertices, 1], community_colors[i], alpha=1.0)

for edge in ss_graph.edges():
    nx.draw_networkx_edges(ss_graph, pos, edgelist=[edge], edge_color=node_community_color.get(edge[0], 'black'), width=.5, alpha=.6)
nx.draw_networkx_nodes(ss_graph, pos, node_size=100, node_color='white', linewidths=1.0, edgecolors='black')
_ = nx.draw_networkx_labels(ss_graph.subgraph(posh_list), pos, bbox=dict(facecolor="white", boxstyle='round'), font_size=10)

# Used GPT4 to
group_names = {
    0: 'Scripting & Automation',
    1: 'Academic & Specialized',
    2: 'Web & Mobile Development',
    3: 'Systems & General Programming',
    4: 'Functional & Multi-Paradigm',
    5: 'Machine Learning & Embedded Systems'
}
legend_handles = [mpatches.Patch(color=community_colors[i], label=name) for i, name in group_names.items()]
plt.legend(handles=legend_handles, loc='lower left')

# Display the plot
plt.show()

In [None]:
# Betweenness

# Set up plot
%config InlineBackend.figure_format = 'retina'
fig, ax = plt.subplots(figsize=(18, 10))
pos = nx.drawing.nx_agraph.graphviz_layout(ss_graph, prog='neato')

# Calculate betweenness centrality for each node
betweenness = nx.betweenness_centrality(ss_graph)
threshold = 0.25

# Draw background community colors (convex hulls)
for i, community in enumerate(sorted_communities):
    community_pos = {k: pos[k] for k in community if k in pos}
    points = np.array(list(community_pos.values()))
    if len(points) > 2:
        hull = ConvexHull(points)
        plt.fill(points[hull.vertices, 0], points[hull.vertices, 1], community_colors[i % len(community_colors)], alpha=1.0)

bridge_languages = [node for node, centrality in betweenness.items() if centrality > threshold]

for node in bridge_languages:
    x, y = pos[node]
    plt.text(x, y, node, fontsize=10, bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.3'))
for edge in ss_graph.edges():
    nx.draw_networkx_edges(ss_graph, pos, edgelist=[edge], edge_color=node_community_color.get(edge[0], 'black'), width=.5, alpha=.6)

nx.draw_networkx_nodes(ss_graph, pos, node_size=100, node_color='white', linewidths=1.0, edgecolors='black')
nx.draw_networkx_nodes(ss_graph, pos, nodelist=bridge_languages, node_size=150, node_color='gold', linewidths=1.0, edgecolors='black')

legend_handles = [mpatches.Patch(color=community_colors[i], label=name) for i, name in group_names.items()]
plt.legend(handles=legend_handles, loc='lower left')

# Display the plot
plt.show()