# Integrated Analysis
This notebook integrates BiG-SCAPE network with additional information

In [None]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import numpy as np
import scipy.spatial as spatial

import yaml
from pathlib import Path

In [None]:
def create_node_trace(G, node_trace_category, color, showtextlabel=False, nodesize=10, nodeopacity=0.8, 
                      nodesymbol="circle", linewidth=1, linecolor="black", textposition="top center", showlegend=False):
    if showtextlabel:
        markermode = "markers+text"
    else:
        markermode = "markers"
    nodes = np.array([node for node in G.nodes() if G.nodes[node]["node_trace"] == node_trace_category])
    pos = np.array([G.nodes[node]['pos'] for node in nodes.flatten()]).reshape(-1, 2)
    xs, ys = pos[:, 0], pos[:, 1]
    texts = np.array([G.nodes[node]['text'] for node in nodes])
    node_trace = go.Scatter(
        x=xs.tolist(),
        y=ys.tolist(),
        text=texts.tolist(),
        textposition=textposition,
        mode=markermode,
        hoverinfo='text',
        name=node_trace_category,
        showlegend=showlegend,
        marker=dict(
            symbol=nodesymbol,
            opacity=nodeopacity,
            showscale=False,
            color=color,
            size=nodesize,
            line=dict(width=linewidth, color=linecolor)))
    return node_trace

In [None]:
def create_edge_trace(Graph, name, showlegend=False, color='#888', width=0.5, opacity=0.8):
    edge_trace = go.Scatter(
        x=[],
        y=[],
        name=name,
        opacity=opacity,
        line=dict(width=width,color=color),
        hoverinfo='none',
        mode='lines',
        showlegend=showlegend)

    edges = np.array([edge for edge in Graph.edges() if G.edges[edge]["relation_type"] == name])
    pos = np.array([Graph.nodes[e]['pos'] for e in edges.flatten()]).reshape(-1, 2)
    xs = np.insert(pos[:, 0], np.arange(2, len(pos[:, 0]), 2), None)
    ys = np.insert(pos[:, 1], np.arange(2, len(pos[:, 1]), 2), None)
    edge_trace['x'] = xs
    edge_trace['y'] = ys

    return edge_trace

In [None]:
def group_markers(points, name, color="red", width=0.5, showlegend=False, fill="toself", opacity=0.8, textposition="bottom center", offset='default'):
    # Find the convex hull of the points
    hull = spatial.ConvexHull(points)

    # Get the x and y coordinates of the vertices of the hull
    x = [points[i][0] for i in hull.vertices] + [points[hull.vertices[0]][0]]
    y = [points[i][1] for i in hull.vertices] + [points[hull.vertices[0]][1]]

    # Calculate the center of the hull
    center_x = np.mean(x)
    center_y = np.mean(y)
    
    if offset == "default":
        if len(points) < 50:
            offset = len(points) * (-0.5/47) + 1.8
        else:
            offset = 1

    # Add an offset of 1 pixel to each vertex
    offset_x = [(xi - center_x) * offset + center_x for xi in x]
    offset_y = [(yi - center_y) * offset + center_y for yi in y]
    trace = go.Scatter(name=name, x=offset_x, y=offset_y, mode='lines', line=dict(width=width, color=color), 
                       showlegend=showlegend, fill=fill, opacity=opacity, textposition=textposition)
    return trace

## File configuration

In [None]:
with open("config.yaml", "r") as f:
    notebook_configuration = yaml.safe_load(f)
notebook_configuration

In [None]:
bgcflow_dir = Path(notebook_configuration["bgcflow_dir"])
project_name = "mq_saccharopolyspora"
report_dir = bgcflow_dir / f"data/processed/{project_name}"
FIGURE = "Figure_6"
FIGURE_ARTS = "Figure_4"
FIGURE_MASH = "Figure_3"
FIGURE_BIGFAM = "Figure_4"

## Setting Up Cutoff and Inputs

In [None]:
cutoff = "0.30"

In [None]:
path_bigscape = report_dir / "bigscape/for_cytoscape_antismash_6.1.1/"

In [None]:
df_bigscape = pd.read_csv([i for i in path_bigscape.glob(f"*df_network*{cutoff}*")][0])
df_bigscape_cluster = pd.read_csv([i for i in path_bigscape.glob(f"*df_cluster*{cutoff}*")][0])
df_antismash = pd.read_csv(report_dir / "tables/df_regions_antismash_6.1.1.csv")
df_bigfam = pd.read_csv(report_dir / "bigslice/query_as_6.1.1/query_network.csv")
df_arts = pd.read_csv(f"assets/tables/{FIGURE_ARTS}_df_arts_hits.csv")
df_mash = pd.read_csv(f"assets/tables/{FIGURE_MASH}b_mash_hcluster.csv", index_col=0)

In [None]:
bigfam_models_stats = pd.read_csv(f"assets/tables/{FIGURE_BIGFAM}_bigfam_models.csv", index_col=0)

## Building Edges for Networkx

In [None]:
edge_bigscape = df_bigscape.loc[:, ['Clustername 1','Clustername 2']]
edge_bigscape = edge_bigscape.rename(columns={'Clustername 1' : 'source',
                                              'Clustername 2' : 'target'})
edge_bigscape['relation_type'] = 'bigscape_similarity'

In [None]:
edge_antismash = df_antismash.loc[:, ['bgc_id', 'most_similar_known_cluster_id']].dropna()
edge_antismash = edge_antismash.rename(columns={'bgc_id' : 'source', 
                                                'most_similar_known_cluster_id' : 'target'})
edge_antismash['relation_type'] = 'antismash_knownclusterblast' 

In [None]:
edge_arts = df_arts.loc[:, ['profile', 'bgc_id']]
edge_arts = edge_arts.rename(columns={'profile' : 'source', 
                                      'bgc_id' : 'target'})
edge_arts['relation_type'] = 'arts_hits'

In [None]:
edge_bigscape_class = df_bigscape_cluster.loc[:, ['bgc_id', 'bigscape_class']]
edge_bigscape_class = edge_bigscape_class.rename(columns={'bgc_id' : 'source', 
                                      'bigscape_class' : 'target'})
edge_bigscape_class['relation_type'] = 'bigscape_class'

In [None]:
edge_bigfam = df_bigfam[df_bigfam['rank'] == 0].loc[:, ['gcf_id', 'bgc_id']]
edge_bigfam = edge_bigfam.rename(columns={'gcf_id' : 'source', 
                                          'bgc_id' : 'target'})
edge_bigfam['relation_type'] = 'bigfam_hits'

In [None]:
for i in df_bigscape_cluster.index:
    genome_id = df_bigscape_cluster.loc[i, "genome_id"] 
    df_bigscape_cluster.loc[i, "phylogroup"] = df_mash.loc[genome_id, "phylogroup"]
edge_mash = df_bigscape_cluster.loc[:, ["bgc_id", "phylogroup"]]
edge_mash = edge_mash.rename(columns={'bgc_id' : 'source', 
                                          'phylogroup' : 'target'})
edge_mash['relation_type'] = 'phylogroup'

## Build mapping categories

In [None]:
nodemap_antismash = df_antismash.loc[:, ['bgc_id', 'product', 'most_similar_known_cluster_description', 'similarity']].set_index('bgc_id')
nodemap_antismash['similarity'] = [f"{i:.2f}" for i in nodemap_antismash['similarity']]
nodemap_antismash = nodemap_antismash.astype(str).T.to_dict()

In [None]:
nodemap_mibig = pd.read_csv(bgcflow_dir / "resources/mibig/df_mibig_bgcs.csv", index_col=0)
nodemap_mibig = nodemap_mibig.loc[:, ["biosyn_class", "compounds"]].T.to_dict()

In [None]:
nodemap_mibig_bigscape = list(set([i for i in edge_bigscape.source if i.startswith("BGC")]).union(set([i for i in edge_bigscape.target if i.startswith("BGC")])))

In [None]:
nodemap_arts = df_arts.set_index("profile").fillna("").loc[:, ["hits_type", "function", "description"]].T.to_dict()

In [None]:
nodemap_bigfam = bigfam_models_stats.loc[:, ["top_chemical_class","top_chemical_class_proportion","top_chemical_subclass","top_chemical_subclass_proportion", "H-index","richness","top_taxa","top_taxa_proportion"]]
nodemap_bigfam = nodemap_bigfam.astype(str).T.to_dict()

In [None]:
arts_map = list(df_arts.profile.unique())
bigfam_map = list(df_bigfam.astype("str").gcf_id.unique())
antismash_map = list(df_antismash.bgc_id.unique())
phylogroup_map = list(df_mash.phylogroup.unique())

## Build Network

In [None]:
df = pd.concat([edge_antismash, edge_bigscape, edge_bigfam, edge_arts])
df = df.astype(str)
G = nx.from_pandas_edgelist(df, source='source', target='target', edge_attr=['relation_type'])

## Filtering
### Remove Unreliable BiG-FAM models
This part requires analysis from [FigS12_query_bigfam.ipynb](FigS12_query_bigfam.ipynb)

In [None]:
deleted_bigfam = []
bigfam_taxa_cutoff = 0.3
bigfam_filter = bigfam_models_stats[bigfam_models_stats.top_taxa_proportion <= bigfam_taxa_cutoff]
for n in bigfam_filter.index:
    try:
        G.remove_node(str(n))
        deleted_bigfam.append(n)  
    except nx.NetworkXError as e:
        print(e)

deleted_bigfam = ', '.join([str(i) for i in deleted_bigfam])

### Network Annotation

In [None]:
# position nodes
pos = nx.nx_agraph.graphviz_layout(G)
for n, p in pos.items():
    G.nodes[n]['pos'] = p
    G.nodes[n]['text'] = n
    if n in arts_map:
        node_trace = 'ARTS model'
        G.nodes[n]['text'] = n + "<br>" + "<br>".join(nodemap_arts[n].values())
    elif n in bigfam_map:
        node_trace = 'BiG-FAM model'
        try:
            G.nodes[n]['text'] = n + "<br>" + "<br>".join(nodemap_bigfam[int(n)].values())
        except KeyError:
            print(f"Node {n} not found in bigfam_mapping")
    elif n in antismash_map:
        node_trace = "BGC"
        try:
            G.nodes[n]['text'] = n + "<br>" + "<br>".join(nodemap_antismash[n].values()) + '<br>GCF_' + str(df_bigscape_cluster.set_index('bgc_id').loc[n, 'fam_id_0.30'])
            if float(nodemap_antismash[n]['similarity']) < 0.4:
                node_trace = "BGC < 40% similarity"
            elif float(nodemap_antismash[n]['similarity']) < 0.8:
                node_trace = "BGC < 80% similarity"
            elif float(nodemap_antismash[n]['similarity']) >= 0.8:
                node_trace = "BGC >= 80% similarity"
        except TypeError as e:
            print(n, e)
        if 'lanthi' in nodemap_antismash[n]['product']:
            node_trace = "lanthipeptide" + " " + node_trace
            print(n, node_trace, nodemap_antismash[n]['product'])

    elif n.startswith("BGC"):
        node_trace = "MIBIG (KnownClusterBlast)"
        if n in nodemap_mibig_bigscape:
            node_trace = "MIBIG (BiG-SCAPE)"
        try:
            G.nodes[n]['text'] = n + "<br>" + "<br>".join(str(nodemap_mibig[n].values()))
        except KeyError:
            pass
    if n in list(edge_mash.source):
        phylogroup = edge_mash.set_index("source").loc[n, "target"]
        G.nodes[n]['text'] = "<br>".join([phylogroup, G.nodes[n]['text']])
    G.nodes[n]['node_trace'] = node_trace

In [None]:
df_erythreapeptin = df_bigscape_cluster[df_bigscape_cluster["bgc_id"].isin(list(G.neighbors('BGC0000513')))]
print(f"Based on Known Cluster Blast connection, we found {df_erythreapeptin.shape[0]} BGCs match to Erythreapeptin (BGC0000513):\n{df_erythreapeptin.bgc_id.unique()}")
print()
df_erythreapeptin_extended = df_bigscape_cluster[df_bigscape_cluster["fam_known_compounds_0.30"].isin(df_erythreapeptin["fam_known_compounds_0.30"].unique())]
print(f"Based on Known Cluster Blast & BiG-SCAPE connection, we found {df_erythreapeptin_extended.shape[0]} BGCs that belongs to gene cluster families with connection to Erythreapeptin (BGC0000513):\n{df_erythreapeptin.bgc_id.unique()}")

df_erythreapeptin_extended_list = df_erythreapeptin_extended.bgc_id.to_list() + ["BGC0000513"]
df_erythreapeptin_extended_list

In [None]:
mash_nodemap = df_bigscape_cluster.set_index("bgc_id").T.to_dict()
for k, v in mash_nodemap.items():
    phylogroup = v["phylogroup"]
    mash_nodemap[k]["color_code"] = df_mash.set_index("phylogroup").T.to_dict()[phylogroup]['color_code']

In [None]:
color_code = {}
for n in G.nodes:
    if n in df_erythreapeptin_extended_list:
        if n.startswith("BGC"):
            G.nodes[n]['node_trace'] = "MIBIG (Erythreapeptin)"
        else:
            phylogroup = mash_nodemap[n]["phylogroup"]
            G.nodes[n]['node_trace'] = f"{phylogroup} | BGC (Erythreapeptin)"
            color_code[G.nodes[n]['node_trace']] = {'color' : mash_nodemap[n]["color_code"], 'node_symbol' : 'circle'}
    elif "lanthipeptide" in G.nodes[n]['node_trace']:
        phylogroup = mash_nodemap[n]["phylogroup"]
        G.nodes[n]['node_trace'] = f"{phylogroup} | lanthipeptide BGC"
        color_code[G.nodes[n]['node_trace']] = {'color' : mash_nodemap[n]["color_code"], 'node_symbol' : 'circle'}


### Export positions

In [None]:
df_export_network = pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')
for i in df_export_network.index:
    df_export_network.loc[i, "x_pos"] = df_export_network.loc[i, "pos"][0]
    df_export_network.loc[i, "y_pos"] = df_export_network.loc[i, "pos"][1]
    df_export_network.loc[i, "cutoff"] = cutoff
df_export_network.to_csv(f"assets/tables/{FIGURE}_network_{cutoff}.csv")

## Define visualizations

In [None]:
edge_annotation_map = {'bigscape_similarity' : {'color':'black',
                                                'width':0.5}, 
                       'antismash_knownclusterblast': {'color':'blue', 
                                                       'width':0.5}, 
                       'arts_hits': {'color':'red', 
                                     'width':0.5}, 
                       'bigfam_hits': {'color':'orange', 
                                       'width':0.5},
                       'phylogroup' : {'color' : 'black',
                                     'width' : 0.5}}

node_annotation_map = {'ARTS model' : {'color':'red',
                                       'node_symbol' : 'star'},
                       'BiG-FAM model' : {'color':'orange',
                                          'node_symbol' : 'triangle-up'},
                       'MIBIG (KnownClusterBlast)' : {'color':'green',
                                                      'node_symbol' : 'square-dot'},
                       'MIBIG (BiG-SCAPE)' : {'color':'blue',
                                              'node_symbol' : 'square'},
                       'MIBIG (Erythreapeptin)': {'color':'blue',
                                              'node_symbol' : 'square'},
                       "BGC >= 80% similarity" : {'color':'blue',
                                                  'node_symbol' : 'circle'},
                       "BGC < 80% similarity" : {'color':'#0077b6',
                                                 'node_symbol' : 'circle'},
                       "BGC < 40% similarity" : {'color':'#90e0ef',
                                                 'node_symbol' : 'circle'},
                       'BGC' : {'color':'#90e0ef',
                                          'node_symbol' : 'circle'},
                       'BGC (Erythreapeptin)' : {'color':'#90e0ef',
                                          'node_symbol' : 'circle'},
                       "lanthipeptide BGC >= 80% similarity" : {'color':'blue',
                                                  'node_symbol' : 'circle'},
                       "lanthipeptide BGC < 80% similarity" : {'color':'#0077b6',
                                                 'node_symbol' : 'circle'},
                       "lanthipeptide BGC < 40% similarity" : {'color':'#90e0ef',
                                                 'node_symbol' : 'circle'},
                       'lanthipeptide BGC' : {'color': 'white',#'#90e0ef',
                                          'node_symbol' : 'circle'},
                       'phylogroup' : {'color' : 'black',
                                       'node_symbol' : 'star'}
                      }
node_annotation_map.update(color_code)
node_annotation_map

## Draw Target Box

In [None]:
x, y = G.nodes['BGC0000513']['pos']
x1 = x - 150
x2 = x + 150
y1 = y - 150
y2 = y + 150

# Figure 5D

In [None]:
traces = []

In [None]:
for e in ['bigscape_similarity', 'arts_hits', 'bigfam_hits', "antismash_knownclusterblast"]:
    opacity=0.2
    if e in ['bigscape_similarity']:
        opacity=0.8
    edge_trace = create_edge_trace(G, e, color=edge_annotation_map[e]['color'], showlegend=True, opacity=opacity)
    traces.append(edge_trace)

for trace in node_annotation_map.keys():
    nodeopacity = 0.2
    showtextlabel = False
    linecolor = "black"
    linewidth = 0.5
    textposition="top left"
    node_size = 8
    if trace in ['lanthipeptide BGC', 'lanthipeptide BGC >= 80% similarity', 
                 'lanthipeptide BGC < 80% similarity', "lanthipeptide BGC < 40% similarity", 
                 "BGC (Erythreapeptin)", 'MIBIG (Erythreapeptin)'] + list(color_code.keys()):#trace.startswith("BGC") or trace.startswith("MIBIG (BiG-SCAPE)"):
        nodeopacity = 0.8
        linewidth = 1.5
    if trace in ['MIBIG (BiG-SCAPE)']:#trace.startswith("BGC") or trace.startswith("MIBIG (BiG-SCAPE)"):
        nodeopacity = 0.5
        linewidth = 0.5
    node_trace = create_node_trace(G, trace, node_annotation_map[trace]['color'], showtextlabel=showtextlabel, 
                                   nodesymbol=node_annotation_map[trace]['node_symbol'], nodeopacity=nodeopacity, 
                                   showlegend=True, linecolor=linecolor, linewidth=linewidth, nodesize=node_size,
                                   textposition=textposition)
    traces.append(node_trace)
    

In [None]:
showlegend=False
color="grey"

# drop all ARTS
G_arts = G.copy()
for n in df_arts.profile.unique():
    try:
        G_arts.remove_node(str(n))
    except nx.NetworkXError as e:
        print(e)
    

for num, query in enumerate(nx.connected_components(G_arts)):
    if len(query) > 2:
        points = []
        for n in query:
            q = G.nodes[n]['pos']
            points.append(q)
        traces.append(group_markers(points, str(num), color="blue", width=1, showlegend=showlegend, fill=None, opacity=0.2))

In [None]:
'lanthipeptide' in G.nodes[n]['node_trace']

In [None]:
showlegend=False
color="grey"

for num, gcf in enumerate(df_bigscape_cluster['fam_id_0.30'].unique()):
    subset = df_bigscape_cluster[df_bigscape_cluster['fam_id_0.30'] == gcf]
    query = subset.bgc_id
    if len(query) > 2:
        show = False
        points = []
        for n in query:
            q = G.nodes[n]['pos']
            points.append(q)
            if (n in df_erythreapeptin_extended_list) or ('lanthipeptide' in G.nodes[n]['node_trace']):
                show = True
        if show:
            opacity = 0.8
        else:
            opacity = 0.1
        traces.append(group_markers(points, num, color="blue", width=1, showlegend=showlegend, fill=None, opacity=opacity))
            

In [None]:
fig1 = go.Figure(data=traces,
                layout=go.Layout(
                    paper_bgcolor='rgba(0,0,0,0)',
                    plot_bgcolor='white',
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, linecolor='black', mirror=True, linewidth=1),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, linecolor='black', mirror=True, linewidth=1),
                    width=600, height=600)
                )

In [None]:
outfile = Path(f"assets/figures/{FIGURE}/{FIGURE}BD_2_blank_{cutoff}.svg")
outfile.parent.mkdir(exist_ok=True, parents=True)
fig1.write_image(outfile)

In [None]:
square = [x1, y1, x2, y2]
fig1.add_shape(type="rect",
               x0=square[0], y0=square[1], x1=square[2], y1=square[3],
               line=dict(color="Red", width=2),
)

fig1.update_layout(width=600, height=600,
    font=dict(
        #family="Courier New, monospace",
        size=16,  # Set the font size here
        #color="RebeccaPurple"
    )
)

In [None]:
outfile = Path(f"assets/figures/{FIGURE}/{FIGURE}BD_1_{cutoff}.svg")
outfile.parent.mkdir(exist_ok=True, parents=True)
fig1.write_image(outfile)

In [None]:
known_MIBIG = [#'BGC0000055', #erythromycin
               'BGC0000148', # A83543A
               #'BGC0000285', #flaviolin 
               #'BGC0000349', #erythrochelin 
               'BGC0000513', #Ery-9 b
               #'BGC0000659', #2-methylisoborneol 
               #'BGC0000902', #flaviolin 
               'BGC0000503', #cinnamycin / kyamicin 
               'BGC0001140', #platensimycin / cebulatin
               #'BGC0000663', #hopene
               #'BGC0000324', #coelibactin
               #'BGC0000661', #geosmin 
               #'BGC0001181', #geosmin
               #'BGC0000853', #ectoine
               #'BGC0001455', #antimycin
               #'BGC0000050', #E-837
              ] #'BGC0002316' #'BGC0002346'

correction = {'erythromycin A;erythromycin B;erythromycin C;erythromycin D' : 'erythromycin',
              'A83543A' : 'spinosyn',
              'iso-migrastatin / migrastatin / dorrigocin A / dorrigocin B / 13-epi-dorrigocin A' : "migrastatin / dorrigocin",
              "flaviolin;flaviolin rhamnoside;3,3'-diflaviolin" : 'flaviolin rhamnoside',
              'Ery-9;Ery-6;Ery-8;Ery-7;Ery-5;Ery-4;Ery-3' : 'erythreapeptin',
              'labyrinthopeptin A2 / labyrinthopeptin A1 / labyrinthopeptin A3': 'labyrinthopeptin',
              'linfuranone B / linfuranone C' : 'linfuranone',
              'thiazostatin / watasemycin A / watasemycin B / 2-hydroxyphenylthiazoline enantiopyochelin / isopyochelin' : 'thiazostatin',
              'totopotensamide A / totopotensamide B': 'totopotensamide',
              'methylpendolmycin / pendolmycin' : 'pendolmycin',
              'platensimycin;platencin;thioplatensimycin;thioplatencin' : 'cebulantin',
              'cinnamycin' : 'kyamicin'
             }

for item in known_MIBIG:
    x, y  = G.nodes[item]['pos']
    try:
        text=[nodemap_mibig[item]['compounds']][0]
    except:
        text=item
    print(text, text in correction.keys())
    if text in correction.keys():
        text = correction[text]
    fig1.add_trace(go.Scatter(
        x=[x],
        y=[y],
        mode="text",
        name="Annotation",
        text=text, 
        textposition="top center"
    ))

fig1.update_layout(width=900, height=900)
fig1

coelibactin; ... avermipeptin and griseopeptin
https://chemistry-europe.onlinelibrary.wiley.com/doi/10.1002/cbic.201200118

cblaseter

# Figure 5B

In [None]:
traces = []

In [None]:
showlegend=False
color="grey"

for num, gcf in enumerate(df_bigscape_cluster['fam_id_0.30'].unique()):
    subset = df_bigscape_cluster[df_bigscape_cluster['fam_id_0.30'] == gcf]
    query = subset.bgc_id
    if len(query) > 2:
        show = False
        points = []
        for n in query:
            q = G.nodes[n]['pos']
            points.append(q)
            if n in df_erythreapeptin_extended_list:
                show = True
        if show:
            opacity = 0.8
        else:
            opacity = 0.1
        traces.append(group_markers(points, num, color="blue", width=1, showlegend=showlegend, fill=None, opacity=opacity))
            

In [None]:
erythreapeptin_dict = df_erythreapeptin_extended.set_index("bgc_id")["fam_id_0.30"].to_dict()
erythreapeptin_dict['BGC0000513'] = 'BGC0000513'

In [None]:
container = []
for n in G.nodes:
    if 'Erythreapeptin' in G.nodes[n]['node_trace']:
        if 'BGC' in n:
            text = f"{erythreapeptin_dict[n]}"
        else:
            text = f"GCF_{erythreapeptin_dict[n]}"
        if text in container:
            G.nodes[n]['text'] = ""
        else:
            G.nodes[n]['text'] = text
            container.append(text)
    elif G.nodes[n]['node_trace'] in ['MIBIG (BiG-SCAPE)']:#, 'ARTS model', ]:
        G.nodes[n]['text'] = n   
    else:
        G.nodes[n]['text'] = ""

In [None]:
for e in ['bigscape_similarity', 'arts_hits', 'bigfam_hits', "antismash_knownclusterblast"]:
    opacity=0.1
    if e in ['bigscape_similarity', "antismash_knownclusterblast"]:
        opacity=0.8
    edge_trace = create_edge_trace(G, e, color=edge_annotation_map[e]['color'], showlegend=True, opacity=opacity)
    traces.append(edge_trace)

for trace in node_annotation_map.keys():
    nodeopacity = 0.1
    showtextlabel = False
    linecolor = "black"
    linewidth = 1.5
    textposition="top center"
    node_size = 20
    if trace in ["BGC (Erythreapeptin)", 'MIBIG (Erythreapeptin)'] + list(color_code.keys()):#trace.startswith("BGC") or trace.startswith("MIBIG (BiG-SCAPE)"):
        nodeopacity = 0.8
        linewidth = 1.5
    if trace in ['MIBIG (BiG-SCAPE)']:#trace.startswith("BGC") or trace.startswith("MIBIG (BiG-SCAPE)"):
        nodeopacity = 0.1
        linewidth = 1.5
    node_trace = create_node_trace(G, trace, node_annotation_map[trace]['color'], showtextlabel=showtextlabel, 
                                   nodesymbol=node_annotation_map[trace]['node_symbol'], nodeopacity=nodeopacity, 
                                   showlegend=True, linecolor=linecolor, linewidth=linewidth, nodesize=node_size,
                                   textposition=textposition)
    traces.append(node_trace)
    

In [None]:
fig2 = go.Figure(data=traces,
                layout=go.Layout(
                    paper_bgcolor='rgba(0,0,0,0)',
                    plot_bgcolor='white',
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, linecolor='black', mirror=True, linewidth=1),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, linecolor='black', mirror=True, linewidth=1),
                    width=600, height=600)
                )

In [None]:
square = [x1, y1, x2, y2]

fig2.update_layout(xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, linecolor='black', mirror=True, linewidth=1, range=[square[0], square[2]]),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, linecolor='black', mirror=True, linewidth=1, range=[square[1], square[3]]))

In [None]:
for subgraph in nx.connected_components(G_arts):
    if 'BGC0000513' in subgraph:
        targets = [i for i in subgraph if i in df_antismash.bgc_id.to_list()]
        erythreapeptin = df_antismash.set_index("bgc_id").loc[targets]

gbk_path_BGC0000513 = bgcflow_dir / "data/external/bgc_selection/MIBIG/BGC0000513.region001.gbk"
gbk_path_BGC0000513.parent.mkdir(parents=True, exist_ok=True)
! wget https://mibig.secondarymetabolites.org/repository/BGC0000513/generated/BGC0000513.gbk -O {gbk_path_BGC0000513} -nc

erythreapeptin.loc['BGC0000513', "genome_id"] = 'GCF_016859185.1'
erythreapeptin.loc['BGC0000513', "source"] = 'bgcflow'
erythreapeptin.loc['BGC0000513', "gbk_path"] = gbk_path_BGC0000513.resolve()
erythreapeptin = erythreapeptin.drop("NZ_CP054839.1.region007")

In [None]:
from pathlib import Path
import json

mapping_dir = report_dir / "log_changes/6.1.1/"
antismash_dir = report_dir / "antismash/6.1.1/"
for i in erythreapeptin.index:
    genome_id = erythreapeptin.loc[i, "genome_id"]
    print(genome_id)
    mapping_dict = mapping_dir / f"{genome_id}-change_log.json"
    with open(mapping_dict, "r") as f:
        mapping_dict = json.load(f)
    for item in mapping_dict[genome_id].values():
        if Path(item['symlink_path']).stem == i:
            gbk_file = Path(item['target_path']).name
            gbk_file = antismash_dir / genome_id / gbk_file
            assert gbk_file.is_file()
            erythreapeptin.loc[i, 'gbk_path'] = gbk_file.resolve()
            erythreapeptin.loc[i, 'record_id'] = item['record_id']
            erythreapeptin.loc[i, 'original_id'] = item['original_id']
            print(item)
erythreapeptin['source'] = "bgcflow"
erythreapeptin

In [None]:
outfile = bgcflow_dir / "config/Erythreapeptin/samples.csv"
outfile.parent.mkdir(parents=True, exist_ok=True)
erythreapeptin.to_csv(outfile)

In [None]:
import shutil
outfile_dir = bgcflow_dir / f"config/Erythreapeptin"

project_config = {'name': 'Erythreapeptin',
                  'schema': 'BGC',
                  'pep_version': '2.1.0',
                  'description': "A selection of Erythreapeptin BGCs from mq_saccharopolyspora",
                  'sample_table': 'samples.csv',
                  'gtdb-tax': 'gtdbtk.bac120.summary.tsv',
                  'prokka-db' : "prokka-db.csv",
                  'rules' : {'bigslice': False,
                             'bigscape': False,
                             'query-bigslice': False,
                             'clinker': True,
                             'mmseqs2': True,
                             'interproscan': False}
                 }

source = bgcflow_dir / "config/mq_saccharopolyspora/gtdbtk.bac120.summary.tsv"
destination = outfile_dir / source.name
shutil.copy(source, destination)

with open(outfile_dir / "project_config.yaml", "w") as f:
    json.dump(project_config, f, indent=2)

In [None]:
outfile = Path(f"assets/figures/{FIGURE}/{FIGURE}BD_2_{cutoff}.svg")
outfile.parent.mkdir(exist_ok=True, parents=True)
fig2.write_image(outfile)

In [None]:
from svgutils.compose import *
from svgutils.compose import Figure

In [None]:
final_figure = Figure("650", "650",
                      Panel(
                          SVG(f"assets/figures/{FIGURE}/{FIGURE}BD_2_{cutoff}.svg").scale(1).move(20, 0),
                          Text("B", 0, 0, size=24, weight='bold'),
                      ),                      
                      Panel(
                          SVG(f"assets/figures/{FIGURE}/{FIGURE}BD_1_{cutoff}.svg").scale(0.3).move(0, 0),
                      ),
                     )
outfile = Path(f"assets/figures/{FIGURE}/{FIGURE}BD_{cutoff}.svg")
outfile.parent.mkdir(parents=True, exist_ok=True)
final_figure.save(outfile)
final_figure

In [None]:
Figure("600", "600",
       Panel(
                          SVG(f"assets/figures/{FIGURE}/{FIGURE}BD_2_blank_{cutoff}.svg").scale(1).move(0, 0),
                          Text("B", 0, 0, size=24, weight='bold'),
                      ), 
      )