# figure 1E

In [None]:
import pandas as pd, numpy as np, igraph as ig, random as rd
import src.landscape_tools as lstoo
import matplotlib.pyplot as plt

# LaTeX font for plots
plt.rcParams.update({
    'font.family': 'serif',  # use serif/main font for text elements
    'text.usetex': True,     # use inline math for ticks
})

In [None]:
# length of sequence and number of spin states
L, q = 10, 2
# number/name of all sites
sites = [26, 27, 28, 31, 35, 50, 53, 56, 57, 58]
# names of key mutations (X=V/L/I)
muts = ['G26E', 'F27X', 'T28I', 'S31R', 'S35T', 'V50L', 'S53P', 'S56T', 'T57A', 'Y58F']

## compute landscape embedding

In [None]:
def rigid_transform_3D(A, B):
    # function to match two clouds of points in 3D latent space

    # check if both data have same number of points
    assert A.shape == B.shape

    # check matching dimensionality of latent space
    num_rows, num_cols = A.shape
    if num_rows != 3:
        raise Exception(f'matrix A is not 3xN, it is {num_rows}x{num_cols}')
    num_rows, num_cols = B.shape
    if num_rows != 3:
        raise Exception(f'matrix B is not 3xN, it is {num_rows}x{num_cols}')

    # find center of mass in each point cloud
    centroid_A = np.mean(A, axis=1)
    centroid_B = np.mean(B, axis=1)

    # ensure centroids are 3x1
    centroid_A = centroid_A.reshape(-1, 1)
    centroid_B = centroid_B.reshape(-1, 1)

    # switch to center-of-mass coordinated
    Am = A - centroid_A
    Bm = B - centroid_B

    # correlation matrix
    H = Am @ np.transpose(Bm)

    # singular-value decomposition
    # find rotation
    U, S, Vt = np.linalg.svd(H)
    R = Vt.T @ U.T

    # special reflection case
    if np.linalg.det(R) < 0:
        #print('det(R) < R, reflection detected!, correcting for it ...')
        Vt[2,:] *= -1
        R = Vt.T @ U.T

    # find translation vector
    t = -R @ centroid_A + centroid_B

    # return rotation matrix and translation vector
    return R, t

In [None]:
# load fitness dataframe for the specific epistasis model
data = pd.read_csv('output/1c_fitness_specific.csv')
data['seq'] = [tuple([int(x) for x in s[1:-1].split(',')]) for s in data.seq]
data = data.set_index('seq')

# create model landscape object
ls_model = lstoo.EmpLS(L=L, q=q, seqs=data.index, fs=data.F1_model, default=np.nan)

# compute fitness peaks
#ls_max = lstoo.locmax(q, L, lambda seq: ls_model.fitness(seq), avseqs=[], nrmax=np.inf)

# list of fitness peak genotypes
#smax = list(ls_max.keys())

In [None]:
# maximum Hamming distance up to which network nodes should be connected by edges
d = 1

# sequence sites to be pinned and random seeds to be used
sites, seeds = [4,6], [1,2]

# get list of sequences for a 9-site sublandscape
seqs_sub = lstoo.seqlist(q=q, L=L-1)

# collector lists for embeddings
pos0, pos1 = [], []

# iterate over quenched sites
for i, seed_rd in zip(sites, seeds):
    
    # get genotype sequences of each of the 9-site sublandscapes
    seqs0 = [s for s in data.index if s[i]==0]
    seqs1 = [s for s in data.index if s[i]==1]
    
    # create networks
    # initialize network objects
    g0, g1 = ig.Graph(len(seqs0)), ig.Graph(len(seqs1))
    
    # add genotype sequences and fitness as node properties
    g0.vs['id'], g1.vs['id'] = seqs0, seqs1
    g0.vs['F'], g1.vs['F'] = data.F1_model[seqs0].tolist(), data.F1_model[seqs1].tolist()

    # add edges to all node pairs which are within the maximum Hamming distance
    g0.add_edges([(a, b) for a, s1 in enumerate(seqs0) for b, s2 in enumerate(seqs0)
                  if lstoo.dH(s1, s2) in range(1, d+1) and sum(s1)>sum(s2)])
    g1.add_edges([(a, b) for a, s1 in enumerate(seqs1) for b, s2 in enumerate(seqs1)
                  if lstoo.dH(s1, s2) in range(1, d+1) and sum(s1)>sum(s2)])

    # list of edge weights in the network
    weights0 = [1./(.0001+abs(g0.vs[a]['F']-g0.vs[b]['F'])) for a, s1 in enumerate(seqs0)
                for b, s2 in enumerate(seqs0) if lstoo.dH(s1, s2) in range(1, d+1) and sum(s1)>sum(s2)]
    weights1 = [1./(.0001+abs(g1.vs[a]['F']-g1.vs[b]['F'])) for a, s1 in enumerate(seqs1)
                for b, s2 in enumerate(seqs1) if lstoo.dH(s1, s2) in range(1, d+1) and sum(s1)>sum(s2)]
    
    # layout of 9-site sublandscapes
    # set random number seed for reproducible embeddings
    # compute force-directed network layout
    rd.seed(seed_rd)
    np.random.seed(1)
    seed = np.random.uniform(size=(q**(L-1), 2))
    layout0 = g0.layout_drl(weights=weights0, seed=seed)
    layout0 = np.array(layout0)
    
    rd.seed(seed_rd)
    np.random.seed(1)
    seed = np.random.uniform(size=(q**(L-1), 2))
    layout1 = g0.layout_drl(weights=weights1, seed=seed)
    layout1 = np.array(layout1)

    # add a dummy 3rd dimension
    pos_3d0 = np.zeros((q**(L-1), 3))
    pos_3d1 = np.zeros((q**(L-1), 3))
    for scnt, s in enumerate(seqs_sub):
        pos_3d0[scnt,:2] = layout0[seqs0.index(s[:i]+tuple([0])+s[i:]),:]
        pos_3d1[scnt,:2] = layout1[seqs1.index(s[:i]+tuple([1])+s[i:]),:]

    # find rotation matrix & translation vector to best match points
    R, t = rigid_transform_3D(pos_3d0.T, pos_3d1.T)

    # transform the first embedding using the rotation matrix and translation vector
    pos_3d2 = (R@pos_3d0.T + t).T

    # collect embeddings
    pos0.append(pos_3d2)
    pos1.append(pos_3d1)

## plot landscape embedding

In [None]:
# find local fitness peaks in sublandscapes
smax_sub = {}

# iterate over quenched sites
for i in sites:
    # iterate over WT & mutated state of the quenched site
    for si in range(q):
        
        # function to map (sublandscape) sequence to fitness
        fitness = lambda seq: ls_model.fitness(seq[:i]+tuple([si])+seq[i:])
        
        # find fitness peaks in the 9-site sublandscape
        sm_sub = [seq for seq in lstoo.locmax(q, L-1, fitness, avseqs=lstoo.seqlist(q, L-1), nrmax=np.inf)]
        
        # collect sublandscape fitness peaks for each quenched site
        smax_sub[(i,si)] = sm_sub

In [None]:
# iterate over quenched sites
for x, i in enumerate(sites):

    # create figure
    fig, ax = plt.subplots(figsize=(3., 3.), constrained_layout=True)

    # plot embedding
    ax.scatter(pos0[x][:,0], pos0[x][:,1], s=5, c='C0')
    ax.scatter(pos1[x][:,0], pos1[x][:,1], s=5, c='C1')

    # layout
    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis('off')

    # markers for fitness peaks
    seqs0 = [s for s in data.index if s[i]==0]
    seqs1 = [s for s in data.index if s[i]==1]
    for sm in smax_sub[(i,0)]:
        ax.scatter([pos0[x][seqs_sub.index(sm),0]], [pos0[x][seqs_sub.index(sm),1]], s=100, c='k', marker='x')
    for sm in smax_sub[(i,1)]:
        ax.scatter([pos1[x][seqs_sub.index(sm),0]], [pos1[x][seqs_sub.index(sm),1]], s=100, c='k', marker='x')

    # save plot
    plt.savefig(f'output/1e_{x+1}.jpg', bbox_inches='tight', pad_inches=0.02, dpi=300)
    plt.savefig(f'output/1e_{x+1}.pdf', bbox_inches='tight', pad_inches=0.02)
    plt.show()