# figure 1D

In [None]:
import pandas as pd, numpy as np, igraph as ig, random as rd
from sklearn.cluster import DBSCAN
import src.landscape_tools as lstoo
import matplotlib as mpl, matplotlib.pyplot as plt

# LaTeX font for plots
plt.rcParams.update({
    'font.family': 'serif',  # use serif/main font for text elements
    'text.usetex': True,     # use inline math for ticks
})

In [None]:
# length of sequence and number of spin states
L, q = 10, 2
# number/name of all sites
sites = [26, 27, 28, 31, 35, 50, 53, 56, 57, 58]
# names of key mutations (X=V/L/I)
muts = ['G26E', 'F27X', 'T28I', 'S31R', 'S35T', 'V50L', 'S53P', 'S56T', 'T57A', 'Y58F']

## compute landscape embedding

In [None]:
# load fitness dataframe for the specific epistasis model
data = pd.read_csv('output/1c_fitness_specific.csv')
data['seq'] = [tuple([int(x) for x in s[1:-1].split(',')]) for s in data.seq]

# create model landscape object
ls_model = lstoo.EmpLS(L=L, q=q, seqs=data.seq, fs=data.F1_model, default=np.nan)

# compute fitness peaks
ls_max = lstoo.locmax(q, L, lambda seq: ls_model.fitness(seq), avseqs=[], nrmax=np.inf)

In [None]:
# maximum Hamming distance up to which network nodes should be connected by edges
ds = [1, 2]

# collector dictionaries for networks, weights and embeddings
gs, weights, layouts = {}, {}, {}

for d in ds:
    
    # create network object
    gs[d] = ig.Graph(q**L)
    
    # add genotype sequences and fitness as node properties
    gs[d].vs['id'], gs[d].vs['F'] = data.seq.tolist(), data.F1_model.tolist()

    # add edges to all node pairs which are within the maximum Hamming distance
    gs[d].add_edges([(x, y) for x, s1 in enumerate(data.seq) for y, s2 in enumerate(data.seq)
                     if lstoo.dH(s1, s2) in range(1, d+1) and sum(s1)>sum(s2)])
    print(f'dmax={d}', gs[d].summary())
    
    # list of edge weights in the network
    weights[d] = [1./(.001+abs(gs[d].vs[x]['F']-gs[d].vs[y]['F'])) for x, s1 in enumerate(data.seq)
                  for y, s2 in enumerate(data.seq) if lstoo.dH(s1, s2) in range(1, d+1) and sum(s1)>sum(s2)]

In [None]:
for d in ds:
    
    # set random number seed for reproducible embeddings
    rd.seed(1)
    np.random.seed(1)
    seed = np.random.uniform(size=(q**L, 2))
    
    # compute force-directed network layout
    layouts[d] = gs[d].layout_drl(weights=weights[d], seed=seed)
    layouts[d] = np.array(layouts[d])

In [None]:
# save landscape embeddings to external file
np.save('output/1d_1.npy', layouts)

In [None]:
# load landscape embeddings from external file
layouts = np.load('output/1d_1.npy', allow_pickle=True).item()

## plot landscape embedding

In [None]:
# fitness-based colors
cs = (data.F1_model-data.F1_model.min())/(data.F1_model.max()-data.F1_model.min())

for col, d in enumerate(ds):

    # create figure
    fig, ax = plt.subplots(figsize=(4., 4.), constrained_layout=True)
    pos = np.array(layouts[d])

    # plot edges between mutational neighbors
    for x, s1 in enumerate(data.seq):
        for y, s2 in enumerate(data.seq[:x]):
            if lstoo.dH(s1, s2) == 1:
                ax.plot([pos[x,0], pos[y,0]], [pos[x,1], pos[y,1]], c='gray', alpha=.025, zorder=-1)
    
    # plot nodes
    im = ax.scatter(pos[:,0], pos[:,1], c=cs, cmap='viridis', s=5)

    # layout
    ax.axis('equal')
    ax.set_xticks([])
    ax.set_yticks([])

    # markers and arrows
    # fitness peaks
    smax = list(ls_max.keys())
    sind = data.seq.tolist().index(tuple([0]*L))
    ax.arrow(pos[sind,0], pos[sind,1]-40, 0, 20, width=.25, head_width=10., color='magenta')
    ax.scatter(pos[sind,0], pos[sind,1], c='magenta', s=5)

    # local fitness maxima
    for s in smax[1:]:
        sind = data.seq.tolist().index(s)
        ax.scatter(pos[sind,0], pos[sind,1], c='k', s=5)
    
    # global fitness maximum
    sind = data.seq.tolist().index(smax[0])
    ax.arrow(pos[sind,0]+40, pos[sind,1], -20, 0, width=.25, head_width=10., color='magenta')
    ax.scatter(pos[sind,0], pos[sind,1], c='magenta', s=5)
    
    # despine plot
    ax.axis('off')

    # save plot
    plt.savefig(f'output/1d_1_dH{d}.jpg', bbox_inches='tight', pad_inches=0.02, dpi=300)
    plt.savefig(f'output/1d_1_dH{d}.pdf', bbox_inches='tight', pad_inches=0.02)
    plt.show()

In [None]:
# hotspot state-based colors
cs = ['C0' if seq[6]==0 else 'C1' for seq in data.seq]

for col, d in enumerate(ds):

    # create figure
    fig, ax = plt.subplots(figsize=(4., 4.), constrained_layout=True)
    pos = layouts[d]

    # plot edges between mutational neighbors
    for x, s1 in enumerate(data.seq):
        for y, s2 in enumerate(data.seq[:x]):
            if lstoo.dH(s1, s2) == 1:
                ax.plot([pos[x,0], pos[y,0]], [pos[x,1], pos[y,1]], c='gray', alpha=.025, zorder=-1)
    
    # plot nodes
    im = ax.scatter(pos[:,0], pos[:,1], c=cs, s=5)

    # layout
    ax.axis('equal')
    ax.set_xticks([])
    ax.set_yticks([])

    # markers and arrows
    # fitness peaks
    smax = list(ls_max.keys())
    sind = data.seq.tolist().index(tuple([0]*L))
    ax.arrow(pos[sind,0], pos[sind,1]-40, 0, 20, width=.25, head_width=10., color='magenta')
    ax.scatter(pos[sind,0], pos[sind,1], c='magenta', s=5)

    # local fitness maxima
    for s in smax[1:]:
        sind = data.seq.tolist().index(s)
        ax.scatter(pos[sind,0], pos[sind,1], c='k', s=5)
    
    # global fitness maximum
    sind = data.seq.tolist().index(smax[0])
    ax.arrow(pos[sind,0]+40, pos[sind,1], -20, 0, width=.25, head_width=10., color='magenta')
    ax.scatter(pos[sind,0], pos[sind,1], c='magenta', s=5)
    
    # despine plot
    ax.axis('off')

    # save plot
    plt.savefig(f'output/s4a_1_dH{d}.jpg', bbox_inches='tight', pad_inches=0.02, dpi=300)
    plt.savefig(f'output/s4a_1_dH{d}.pdf', bbox_inches='tight', pad_inches=0.02)
    plt.show()

In [None]:
# save figure
fig = plt.figure(figsize=(.5,2.))
ax = fig.add_axes([.05, .8, 0.25, 0.9])

# colorbar
norm = mpl.colors.Normalize(vmin = data.F1_model.min(), vmax = data.F1_model.max()) 
cb = mpl.colorbar.ColorbarBase(ax, orientation='vertical', cmap=plt.get_cmap('viridis'), norm=norm)
cb.ax.tick_params(labelsize=15)
cb.ax.set_title(r'$F(\mathbf{s})$', fontsize=15)

# save plot
plt.savefig('output/1d_2.jpg', bbox_inches='tight', pad_inches=0.02, dpi=300)
plt.savefig('output/1d_2.pdf', bbox_inches='tight', pad_inches=0.02)
plt.show()

In [None]:
# subset of the sequence sites
sites = [0,1,5,6,9]

# maximum Hamming distance for edges
d = 1

# create figure
fig, ax = plt.subplots(figsize=(2.8*5, 2.8), ncols=5, nrows=1, constrained_layout=True)
pos = layouts[d]

# iteration over sites of interest
for i in range(len(sites)):
    
    # plot edges between mutational neighbors
    for x, s1 in enumerate(data.seq):
        for y, s2 in enumerate(data.seq[:x]):
            if lstoo.dH(s1, s2) == 1:
                ax[i].plot([pos[x,0], pos[y,0]], [pos[x,1], pos[y,1]], c='gray', alpha=.025, zorder=-1)

    # plot nodes
    cs = ['C0' if seq[sites[i]]==0 else 'C1' for seq in data.seq]
    im = ax[i].scatter(pos[:,0], pos[:,1], c=cs, s=5) 

    # layout
    ax[i].axis('equal')
    ax[i].set_xticks([])
    ax[i].set_yticks([])
    ax[i].set_title(r'\texttt{%s}'%(muts[sites[i]]), fontsize=15)
    ax[i].axis('off')

# save plot
plt.savefig('output/s4b_1.jpg', bbox_inches='tight', pad_inches=0.02, dpi=300)
plt.savefig('output/s4b_1.pdf', bbox_inches='tight', pad_inches=0.02)
plt.show()