# Extracting Microservices from Monolithic Systems using Deep Reinforcement Learning

This notebook includes all the code used to create the figures and the analysis of the evaluation section in the paper "Extracting Microservices from Monolithic Systems using Deep Reinforcement Learning" which has been submitted to the Empirical Software Engineering journal.

In [None]:
import os
import re
import itertools

import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
from sklearn.preprocessing import StandardScaler
from scipy.stats import rankdata

In [None]:
sns.set_theme()
project_dir = os.curdir
fig_path = os.path.join(project_dir,"figures")
os.makedirs(fig_path, exist_ok=True)

# RQ1:

## Load and prepare data

In [None]:
# setup the variables
logs_path = os.path.join(project_dir, "data", "mono")
ascending_metrics = ['icp', 'ifn', 'ned']
descending_metrics = ['chm', 'chd', 'smq', 'cmq', 'cov', "msn"]
order_metric = "chm"
groupby = ["application", "configuration"]
groupby_data = dict()
groupby_data["application"] = ['plants', 'petclinic-legacy', 'acmeair', 'daytrader', 'roller']
groupby_data["configuration"] = ['Sequential', 'Flattened', 'CombSequential']
metrics = ["chm", "chd", "icp", "ned", "cov"]

In [None]:
# load the data for all applications
dfs = list()
a = "rldec"
for app in groupby_data["application"]:
    df = pd.read_csv(os.path.join(logs_path, a, app, "results.csv"), index_col=0)
    df["application"] = app
    df["configuration"] = df.apply(lambda x: re.fullmatch(r"(\S*)DQN\d*/.*", x["exp_id"])[1], axis=1)
    dfs.append(df)
df = pd.concat(dfs, axis=0, ignore_index=True)
df["approach"] = a

In [None]:
df.head()

In [None]:
metrics = ["chm", "chd", "icp"]

In [None]:
# find the decomposition that achieved the highest "order_metric" (default "chm") for each configuration and application
keys = list(groupby_data.keys())
if order_metric in descending_metrics:
    df_m = df.groupby(keys)[order_metric].max().reset_index()
else:
    df_m = df.groupby(keys)[order_metric].min().reset_index()
dff = df[df[keys+[order_metric]].apply(lambda x: np.all([np.any(x[i]==df_m[i]) for i in keys+[order_metric]]), axis=1)]
df = dff.groupby(keys).first().reset_index()
df.head()

## Generate boxplots

In [None]:
# generate the barplot in a single figure (change save from None to a figure name to save the image)
save = "fig_rq1_barplots"
sns.set_theme()
fig, axes = plt.subplots(1, len(metrics), figsize=(40,5))
for i, m in enumerate(metrics):
    g = sns.barplot(
        data=df,
        x="application", y=m, hue="configuration",
        order=groupby_data["application"], hue_order=groupby_data["configuration"],
        ax = axes[i], alpha=0.8
    )
    g.grid(False)
    g.set_xlabel("")
    g.set_ylabel(m.upper(), rotation=90, size='large')
    g.legend().set_title("")
plt.title("")
if save is not None:
    plt.savefig(os.path.join(fig_path, "{}.pdf".format(save)),bbox_inches='tight')
plt.show()

In [None]:
# generate the barplot individually (change save from None to a figure name to save the images)
sns.set_theme()
for i, m in enumerate(metrics):
    fig, axes = plt.subplots(1, 1, figsize=(12,5))
    save = None #f"fig_rq1_barplots_{m}"
    g = sns.barplot(
        data=df,
        x="application", y=m, hue="configuration",
        order=groupby_data["application"], hue_order=groupby_data["configuration"],
        alpha=0.8
    )
    g.grid(False)
    g.set_xlabel("")
    g.set_ylabel(m.upper(), rotation=90, size='large')
    g.legend().set_title("")
    plt.title("")
    if save is not None:
        plt.savefig(os.path.join(fig_path, "{}.pdf".format(save)),bbox_inches='tight')
    plt.show()

# RQ2

## Load and prepare data

In [None]:
# setup the variables
logs_path = os.path.join(project_dir, "data", "mono")
ascending_metrics = ['icp', 'ifn', 'ned', 'bcp']
descending_metrics = ['chm', 'chd', 'smq', 'cmq', 'cov', "msn", "score"]
order_metric = "chm"
baselines = ['rldec', 'cogcn', 'topicdecomp', 'hydec', 'mono2micro', 'msextractor', 'random', 'boulder', 'grains']
projects = ['plants', 'petclinic-legacy', 'acmeair', 'daytrader', 'roller', 'jpetstore-6', 'partsunlimitedmrp', '7ep-demo']
groupby = ["application","approach"]
ned_threshold = 0.85
groupby_data = {"application":projects, "approach":baselines}
metrics = ["chm", "chd", "icp", "ned", "bcp", "cov", "msn"]
save = None #"rq2_boxplots"

In [None]:
# load the data
dfs = list()
for a in os.listdir(logs_path):
    df_list = list()
    for app in os.listdir(os.path.join(logs_path, a)):
        df = pd.read_csv(os.path.join(logs_path, a, app, "results.csv"), index_col=0)
        if a=="rldec":
            df = df[df.apply(lambda x: x["exp_id"].startswith("CombSequential"), axis=1)] # use only the CombSequential configuration
        df["application"] = app
        df_list.append(df)
    df = pd.concat(df_list, axis=0, ignore_index=True)
    if a=="benchmark":
        df["approach"] = df.apply(lambda x: "other_random" if x["exp_id"].startswith("random_") else x["exp_id"], axis=1)
    else:
        df["approach"] = a
        df = df[df.ned<=ned_threshold] # We exclude decompositions whose NED values exceed the threshold (extreme/outlier decompositions)
    dfs.append(df)
df = pd.concat(dfs, axis=0, ignore_index=True)

In [None]:
# train the standard scaler and add the combined metric DSCORE
dfcp = df.copy()
df["score"] = 0
for app in df.application.unique():
    cond = (df.application==app)
    for m in metrics:
        if m in ["cov", "msn"]:
            continue
        x = StandardScaler().fit_transform(df[cond][m].values.reshape(-1, 1))[:,0]
        if m in descending_metrics:
            df.loc[cond, "score"] += x
            dfcp.loc[cond, m] = x
        else:
            x = StandardScaler().fit_transform(df[cond][m].values.reshape(-1, 1))[:,0]
            df.loc[cond, "score"] -= x
            dfcp.loc[cond, m] = x
metrics.append("score")

In [None]:
df.head()

## Select decompositions

In [None]:
# find the decomposition that achieved the highest "order_metric" (default "chm") for each configuration and application
keys = list(groupby_data.keys()) # application/approach
selection_metric = "score"
if selection_metric in descending_metrics:
    df_m = df.groupby(keys)[selection_metric].max().reset_index()
else:
    df_m = df.groupby(keys)[selection_metric].min().reset_index()
dff = df[np.any([(df[keys+[selection_metric]]==df_m.iloc[i]).all(1) for i in range(df_m.shape[0])], axis=0)]
df_b = dff.groupby(keys).first().reset_index()
assert df_b.shape[0]==df_m.shape[0]
df_b.head()

In [None]:
# We find the median and best (decomposition that maximised the selection metric) decompositions for each application/approach
df_bb = df_b.set_index(["application", "approach"])
m1 = pd.MultiIndex.from_product([projects, baselines])
m2 = pd.MultiIndex.from_product([metrics, ["median", "best"]])
df_tab = pd.DataFrame(index=m2, columns = m1)
for i, project in enumerate(projects):
    for j, baseline in enumerate(baselines):
        dft = df[(df["application"]==project)&(df["approach"]==baseline)]
        for metric in metrics:
            df_tab.loc[(metric, "median"), (project, baseline)] = round(dft[metric].median(), 3)
            df_tab.loc[(metric, "best"), (project, baseline)] = round(df_bb.loc[(project, baseline)][metric], 3)
df_tab

## Prepare RQ2 tables

In [None]:
# Ensure the decompositions "boulder", "random", "grains" are never included in the selection process
df_tab_copy = df_tab.copy()
for m in metrics:
    if m in descending_metrics:
        val = df_tab_copy.loc[(m,"median")].min()
    else:
        val = df_tab_copy.loc[(m,"median")].max()
    for a in ["boulder", "random", "grains"]:
        for v in ["median", "best"]:
            for app in projects:
                df_tab_copy.loc[(m,v), (app,a)] = val
# find the best baseline for each application/metric
df_tab_is_max = df_tab.T.copy()
for c in df_tab_is_max.columns:
    df_tab_is_max[c] = False
for app in projects:
    for m in metrics:
        if m in ["msn", "cov"]:
            continue
        func = np.argmax if m in descending_metrics else np.argmin
        for v in ["median", "best"]:
            a = df_tab_copy.T.loc[app].index.values[func(df_tab_copy.T.loc[app][(m,v)])]
            df_tab_is_max.loc[(app, a), (m,v)] = True
# Highlight in bold the best value for each application/metric in the table
def custom_format(x):
    return f"\\textbf{{{x[0]:0.3f}}}" if df_tab_is_max.loc[x.index[0], x.name] else f"{x[0]:0.3f}"
df_cp = df_tab.apply(lambda x: pd.DataFrame(x).apply(custom_format, axis=1))
df_cp

In [None]:
metric_name_map = {
    "chm": r"CHM $\nearrow$",
    "chd": r"CHD $\nearrow$",
    "icp": r"ICP $\searrow$",
    "bcp": r"BCP $\searrow$",
    "ned": r"NED $\searrow$",
    "cov": r"COV $\nearrow$",
    "msn": r"MSN",
    "score": r"SCORE $\nearrow$",
}
app_name_map = {
    "plants": "Plants",
    "petclinic-legacy": "PetClinic",
    "acmeair": "ACMEair",
    "daytrader": "DayTrader",
    "roller": "Roller",
    "jpetstore-6": "JPetStore",
    "7ep-demo": "7ep-demo",
    "partsunlimitedmrp": "PartsMRP"
}
baseline_name_map = {
    "rldec": "RLDec",
    "cogcn": "CoGCN",
    "topicdecomp": "TopicDecomp",
    "hydec": "HyDec",
    "mono2micro": "Mono2micro",
    "msextractor": "MSExtractor",
}

In [None]:
dft = df_cp.T.reset_index().rename(columns={"level_0":"application", "level_1":"baseline"})
step = 2
metrics_to_use = ['chm', 'chd', 'icp', 'bcp', 'ned', 'cov', 'score']
for i in range(0, len(projects), step):
    app = projects[i]
    app2 = projects[i+1]
    dfts = dict()
    for a in [app, app2]:
        dftt = dft[dft.application==a]
        name = app_name_map[a] if a in app_name_map else a
        dftt["baseline"] = dftt["baseline"].apply(lambda b: baseline_name_map[b] if b in baseline_name_map else b)
        dftt = dftt.drop(columns=["application"]).set_index("baseline")
        dftt = dftt[[(m,"best") for m in metrics_to_use]]
        dftt.columns = [metric_name_map[m] if m in metric_name_map else m for m in  metrics_to_use]
        dfts[name] = dftt
    dff = pd.concat(dfts, axis=1)
    display(dff)

## Create rankings table

In [None]:
# sort the baselines for each application/metric
dff = pd.DataFrame(index=df_tab.T.index)
for m in metrics:
    dff[m] = df_tab.T[(m, "best")]
dff = dff.reset_index().rename(columns={"level_0":"application", "level_1":"approach"})
to_show = ['rldec', 'cogcn', 'topicdecomp', 'hydec', 'mono2micro', 'msextractor']
metrics_to_use = ["chm", "chd", "icp", "bcp", "ned", "score"]
df_pos = dff[dff.approach.isin(to_show)].copy().set_index(["application","approach"])[metrics_to_use]
for app in df_pos.reset_index().application.unique():
    for m in df_pos.columns:
        if m in descending_metrics:
            ascending = False
        else:
            ascending = True
        dft = dff[["application","approach", m]]
        dft = dft[dft.application==app][["approach", m]]
        dft = dft[dft.approach.isin(to_show)]
        df_pos.loc[zip(itertools.repeat(app), dft.approach), m] = rankdata(dft[m].values*(2*ascending-1))
df_pos = df_pos.astype(np.int64)
df_pos

In [None]:
# Calculate the sum of rankings over the applications
df_spos = df_pos.reset_index().groupby("approach").sum()
df_spos

In [None]:
# Format the table for the LaTeX paper
rankings = df_spos.apply(lambda x: rankdata(x).astype(int), axis=0)
def custom_format(x):
    if rankings.loc[x.name, x.index[0]]==1:
        t = f"\\textbf{{{x[0]}}}"
    elif rankings.loc[x.name, x.index[0]]==2:
        t = f"{x[0]}*"
    elif rankings.loc[x.name, x.index[0]]==3:
        t = f"\\textit{{{x[0]}}}"
    else:
        t = f"{x[0]}"
    return t
df_spos_cp = df_spos.apply(lambda x: pd.DataFrame(x).apply(custom_format, axis=1))
df_spos_cp

In [None]:
def custom_format(x):
    if rankings.loc[x.name, x.index[0]]==1:
        t = f"\\textbf{{{x[0]}}}"
    elif rankings.loc[x.name, x.index[0]]==2:
        t = f"{x[0]}*"
    elif rankings.loc[x.name, x.index[0]]==3:
        t = f"\\textit{{{x[0]}}}"
    else:
        t = f"{x[0]}"
    return t
df_spos_cp = df_spos.apply(lambda x: pd.DataFrame(x).apply(custom_format, axis=1))
df_spos_cp

# RQ3:

## Empirical evaluation

### Load and prepare data

In [None]:
# setup the variables
ned_threshold = 0.85
micro_logs_path = os.path.join(project_dir, "data", "micro")
acc_metrics = ['accuracy', 'precision.1', 'recall.1', 'f1_score', 'roc_auc', 'fbeta_score']
to_include = ['chm', 'chd', 'smq', 'cmq', 'icp', 'msn', 'ned', "precision", "recall", "SRP@5", "SRP@7", "SRP@9"] + acc_metrics
metrics = ['chm', 'chd', 'icp', 'ned', 'msn', "precision", "recall", "SRP@5", "SRP@9", 'precision.1', 'recall.1', 'fbeta_score']
ascending_metrics = ['icp', 'ifn', 'ned']
descending_metrics = ['score', 'chm', 'chd', 'smq', 'cmq', 'cov', "msn", "precision", "recall"] + ["SRP@"+str(i) for i in range(11)] + ["SRR@"+str(i) for i in range(11)] + acc_metrics
order_metric = "fbeta_score"
baselines = ['rldec', 'benchmark']
projects = ['petclinic-microservices', 'es-kanban-board', 'microservices-event-sourcing', 'social-edition-modular', 'social-edition-microservices']
rename = ['petclinic', 'kanban', 'event-sourcing', 'social-modular', 'social-microservices']
rename_map = {i:j for i,j in zip(projects, rename)}

In [None]:
# load the RLDec data
dfs = list()
a = "rldec"
for app in os.listdir(os.path.join(micro_logs_path, a)):
    df = pd.read_csv(os.path.join(micro_logs_path, a, app, "results.csv"), index_col=0)
    df["application"] = app
    dfs.append(df)
    old_columns = df.columns
df = pd.concat(dfs, axis=0, ignore_index=True)
df["approach"] = a
df = df[["application", "approach"]+to_include]
df["application_short"] = df.apply(lambda x: rename_map[x["application"]], axis=1)

In [None]:
# load the benchmark data
dfs = list()
a = "benchmark"
for app in os.listdir(os.path.join(micro_logs_path, a)):
    df2 = pd.read_csv(os.path.join(micro_logs_path, a, app, "results.csv"), index_col=0)
    df2["application"] = app
    df2.columns = old_columns
    dfs.append(df2)
df2 = pd.concat(dfs, axis=0, ignore_index=True)
df2["approach"] = df2.apply(lambda x: "other_random" if x["exp_id"].startswith("random_") else x["exp_id"], axis=1)
df2 = df2[["application", "approach"]+to_include]
df2["application_short"] = df2.apply(lambda x: rename_map[x["application"]], axis=1)

In [None]:
# Exclude outliers and concat data
df = df[(df.ned<ned_threshold)|(df.approach!="rldec")]
df3 = pd.concat([df, df2])

### Boxplots

#### MSFB boxplot

In [None]:
save = "fig_rq3_boxplots_fbeta"
baselines = ['rldec']

metric = "fbeta_score"
metric_rename = "MSFB"
fig, axes = plt.subplots(1, 1, figsize=(10,5))
dft = df.rename(columns={metric:metric_rename})
x = sns.boxplot(data=dft, x="application_short", y=metric_rename, ax = axes, order=rename)
extra_legend_elements = [
    Line2D([0], [0], color="brown", label="random", ls="--"),
    Line2D([0], [0], color="darkolivegreen", label="boulder", ls="-."),
]
for i, app in enumerate(rename):
    for v, c, ls in zip(["random", "boulder"], ["brown", "darkolivegreen"], ["--", "-."]): #["red", "green"]
        y = df2[(df2.application_short==app)&(df2.approach==v)].iloc[0][metric]
        x.axhline(y = y,
                   xmin = 0.02 + i*0.2,
                   xmax = 0.18 + i*0.2,
                  color=c, linestyle=ls, linewidth=2)
x.grid(False)
x.get_xaxis().get_label().set_visible(False)
axes.legend(handles=extra_legend_elements, loc="upper right")
plt.title("")
if save is not None:
    plt.savefig(os.path.join(fig_path, "{}.pdf".format(save)), bbox_inches='tight')
plt.show()

#### MSE-precision and MSE-recall boxplots

In [None]:
save = "fig_rq3_boxplots_mse"
baselines = ['rldec']

metrics = ["precision", "recall"]
new_metric_names = ["mse-precision", "mse-recall"]
dft = df.rename(columns={m:nm for m, nm in zip(metrics, new_metric_names)})
fig, axes = plt.subplots(1, len(metrics), figsize=(20,5))
for j,m in enumerate(new_metric_names):
    x = sns.boxplot(data=dft, x="application_short", y=m, ax = axes[j], order=rename)
    x.grid(False)
    x.get_xaxis().get_label().set_visible(False)
plt.title("")
if save is not None:
    plt.savefig(os.path.join(fig_path, "{}.pdf".format(save)), bbox_inches='tight')
plt.show()

#### SRP boxplots

In [None]:
save = "fig_rq3_boxplots_srp"
metrics = ["SRP@5", "SRP@9"]
fig, axes = plt.subplots(1, len(metrics), figsize=(20,5))
for j,m in enumerate(metrics):
    x = sns.boxplot(data=df, x="application_short", y=m, ax = axes[j], order=rename)
    x.grid(False)
    x.get_xaxis().get_label().set_visible(False)
plt.title("")
if save is not None:
    plt.savefig(os.path.join(fig_path, "{}.pdf".format(save)), bbox_inches='tight')
plt.show()

## Qualitative example

In [None]:
app = "petclinic-microservices"
micro_logs_path = os.path.join(os.curdir, "data", "micro")
to_include = ['chm', 'chd', 'smq', 'cmq', 'icp', 'ned', "precision", "recall", "SRP@5", "SRP@7", "SRP@9"]
ascending_metrics = ['icp', 'ifn', 'ned']
descending_metrics = ['chm', 'chd', 'smq', 'cmq', 'cov', "msn", "precision", "recall"] + ["SRP@"+str(i) for i in range(11)] + ["SRR@"+str(i) for i in range(11)]
ned_threshold = 0.85

In [None]:
# load the data
a = "rldec"
df = pd.read_csv(os.path.join(micro_logs_path, a, app, "results.csv"), index_col=0) # Load the decomposition metric data
decomps = pd.read_csv(os.path.join(micro_logs_path, a, app, "decompositions.csv"), index_col=0) # Load the decompositions
cond = df.ned<ned_threshold
df = df[cond]
decomps = decomps[cond]
df["application"] = app
df["approach"] = a
# df["f1score"] = df.apply(lambda x: f1score(x["precision"], x["recall"]), axis=1)
order_metric = "fbeta_score"
top_decomp_index = np.argmax(df[order_metric])
top_decomp_metrics = df.iloc[top_decomp_index]
top_decomp = decomps.iloc[top_decomp_index]

In [None]:
# load the data
a = "topicdecomp"
df = pd.read_csv(os.path.join(micro_logs_path, a, app, "results.csv"), index_col=0) # Load the decomposition metric data
decomps = pd.read_csv(os.path.join(micro_logs_path, a, app, "decompositions.csv"), index_col=0) # Load the decompositions
cond = df.ned<ned_threshold
df = df[cond]
decomps = decomps[cond]
df["application"] = app
df["approach"] = a
# df["f1score"] = df.apply(lambda x: f1score(x["precision"], x["recall"]), axis=1)
order_metric = "fbeta_score"
top_decomp_td_index = np.argmax(df[order_metric])
top_decomp_td_metrics = df.iloc[top_decomp_td_index]
top_decomp_td = decomps.iloc[top_decomp_td_index]

In [None]:
display(top_decomp_metrics)

In [None]:
display(top_decomp_td_metrics)

In [None]:
display(top_decomp)

### Generate graph visualization of the actual microservices

In [None]:
from pyvis.network import Network
from matplotlib.colors import to_hex

In [None]:
shape = (800, 1500)
output_path = os.path.join(os.curdir, "figures")

In [None]:
class_names = list(top_decomp.index)
node_map_s = {i:re.match(r"spring-petclinic-([^.]*)\..*", i)[1] for i in class_names}
short_names = {i:i.split(".")[-1] for i in class_names}
nodes = list(node_map_s.keys()) # classes
services_s = list(set(node_map_s.values()))  # microservices

In [None]:
colors_s = dict()
for i, service in enumerate(services_s):
    colors_s[service] = to_hex(plt.cm.Pastel1.colors[i])

In [None]:
N = Network(shape[0], shape[1], directed=False, notebook=False)
N.force_atlas_2based()
for n in nodes:
    N.add_node(n, label=short_names[n], title=n, color=colors_s[node_map_s[n]])
for service in services_s:
    N.add_node(service, label=service, title=service, color=colors_s[service], shape='box')
for c, s in node_map_s.items():
    N.add_edge(c, s)
N.show(os.path.join(output_path, "{}_true_micro.html".format(app)))

### Generate graph visualization of the RLDec microservices

In [None]:
from pyvis.network import Network
from matplotlib.colors import to_hex

In [None]:
shape = (800, 1500)
output_path = os.path.join(os.curdir, "figures")

In [None]:
class_names = list(top_decomp.index)
node_map = {i: "micro_{}".format(v) for i, v in zip(top_decomp.index, top_decomp.values)}
short_names = {i:i.split(".")[-1] for i in class_names}
nodes = list(node_map.keys()) # classes
services = list(set(node_map.values()))  # microservices

In [None]:
colors = dict()
for i, service in enumerate(services):
    colors[service] = to_hex(plt.cm.Pastel1.colors[(len(colors_s)+i)%len(plt.cm.Pastel1.colors)])

In [None]:
svc_map = {svc:[colors_s[node_map_s[c]] for c in node_map if node_map[c]==svc] for svc in services}
svc_map = {svc:pd.value_counts(svc_map[svc]).sort_values().index[-1] for svc in svc_map}

In [None]:
N = Network(shape[0], shape[1], directed=False, notebook=False)
N.force_atlas_2based()
for n in nodes:
    N.add_node(n, label=short_names[n], title=n, color=colors_s[node_map_s[n]])
for service in services:
    N.add_node(service, label=service, title=service, color=svc_map[service], shape='box')
for service in services_s:
    N.add_node(service, label=service, title=service, color=colors_s[service], shape='ellipsis')
for c, s in node_map.items():
    N.add_edge(c, s)
N.show(os.path.join(output_path, "{}_rldec_micro.html".format(app)))

### Generate graph visualization of the generated microservices

In [None]:
from pyvis.network import Network
from matplotlib.colors import to_hex

In [None]:
shape = (800, 1500)
output_path = os.path.join(os.curdir, "figures")

In [None]:
class_names = list(top_decomp_td.index)
node_map = {i: "micro_{}".format(v) for i, v in zip(top_decomp_td.index, top_decomp_td.values)}
short_names = {i:i.split(".")[-1] for i in class_names}
nodes = list(node_map.keys()) # classes
services = list(set(node_map.values()))  # microservices

In [None]:
colors = dict()
for i, service in enumerate(services):
    colors[service] = to_hex(plt.cm.Pastel1.colors[(len(colors_s)+i)%len(plt.cm.Pastel1.colors)])

In [None]:
svc_map = {svc:[colors_s[node_map_s[c]] for c in node_map if node_map[c]==svc] for svc in services}
svc_map = {svc:pd.value_counts(svc_map[svc]).sort_values().index[-1] for svc in svc_map}

In [None]:
N = Network(shape[0], shape[1], directed=False, notebook=False)
N.force_atlas_2based()
for n in nodes:
    N.add_node(n, label=short_names[n], title=n, color=colors_s[node_map_s[n]])
for service in services:
    N.add_node(service, label=service, title=service, color=svc_map[service], shape='box')
for service in services_s:
    N.add_node(service, label=service, title=service, color=colors_s[service], shape='ellipsis')
for c, s in node_map.items():
    N.add_edge(c, s)
N.show(os.path.join(output_path, "{}_topicdecomp_micro.html".format(app)))