# figure 1C

In [None]:
import pandas as pd, numpy as np
from scipy.optimize import minimize
import src.landscape_tools as lstoo, src.plot_tools as plottoo
import matplotlib as mpl, matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

# LaTeX font for plots
plt.rcParams.update({
    'font.family': 'serif',  # use serif/main font for text elements
    'text.usetex': True,     # use inline math for ticks
})

In [None]:
# length of sequence and number of spin states
L, q = 10, 2
# number/name of all sites
sites = [26, 27, 28, 31, 35, 50, 53, 56, 57, 58]
# names of key mutations (X=V/L/I)
muts = ['G26E', 'F27X', 'T28I', 'S31R', 'S35T', 'V50L', 'S53P', 'S56T', 'T57A', 'Y58F']

In [None]:
# mapping site number to vector index
pos2i = {pos: i for i, pos in enumerate(sites)}

# load sequence count data
data = pd.read_csv('data/COV107_mutlib_fit_filtered_exp.tsv', sep='\t')

# convert column 'mut' in the data file to spin chains of 0/1s
seqs = []
for x in range(len(data)):
    seq = [0 for i in range(L)]
    for mut in data['mut'][x].split('-'):
        if mut != 'WT':
            seq[pos2i[int(mut[1:-1])]] = 1
    seqs.append(tuple(seq))
data['mut'] = seqs

# group by sequence and sum sequence counts & rename and drop columns
data.drop(columns=['mutclass','exp1_enrich','exp2_enrich'], inplace=True)
data.rename(columns={'mut':'seq', 'input_Count':'ni', 'exp1_count':'no1', 'exp2_count':'no2'}, inplace=True)
data = data.groupby('seq').sum().reset_index()

# compute empirical enrichments as log-enrichments
data['F1_emp'], data['F2_emp'] = np.log((1.+data.no1) / (1.+data.ni)), np.log((1.+data.no2) / (1.+data.ni))

# enumerate all possible sequences and sort dataframe
seqs = lstoo.seqlist(q=q, L=L)
data['seq'] = pd.Categorical(data['seq'], categories=seqs, ordered=True)
data.sort_values('seq').reset_index(drop=True)

# subtract offset to have germline at zero fitness
data['F1_emp'] -= data.F1_emp.iloc[0]
data['F2_emp'] -= data.F2_emp.iloc[0]

## fit specific epistasis model

In [None]:
# max order of site interactions in the model
order = 3

# matrix in F=M.X where F = vector of fitness values, X = vector of model parameters
M = lstoo.mkM(q=q, L=L, order=order)

# log-likelihood function
def loglike(X, ni, no):
    F = M.dot(X)
    return -( (no*F).sum() - no.sum()*np.log((ni*np.exp(F)).sum()) ) / q**L

# iterate over experimental replicas
for r in [1,2]:
    print(f'replicate {r}')
    
    # model inference using maximum likelihood
    res = minimize(loglike,
                   x0 = np.zeros(M.shape[1]),
                   args = (data.ni, data['no'+str(r)]),
                   method = 'BFGS', tol = 1e-3,
                   callback = lambda y: print(loglike(y, data.ni, data['no'+str(r)])))

    # save result object to external file
    np.save(f'output/1c_repl{r}.npy', res)

    # compute fitness values of the model
    data[f'F{r}_model'] = M.dot(res.x)

In [None]:
# create model landscape object
ls_model = lstoo.EmpLS(L=L, q=q, seqs=data.seq, fs=data.F1_model, default=np.nan)

# compute fitness peaks
ls_max = lstoo.locmax(q, L, lambda seq: ls_model.fitness(seq), avseqs=[], nrmax=np.inf)
for smax in ls_max:
    print(smax, ls_model.fitness(smax))

In [None]:
# save fitness dataframe to external file
data.to_csv('output/1c_fitness_specific.csv')

In [None]:
# load fitness dataframe from external file
data = pd.read_csv('output/1c_fitness_specific.csv')
data['seq'] = [tuple([int(a) for a in s[1:-1].split(', ')]) for s in data.seq]

## plot specific epistasis model

In [None]:
# replicate of the experiment
r = 1

# load fitted coefficients of the model
res = np.load(f'output/1c_repl{r}.npy', allow_pickle=True).item()

# extract 1st-order term coefficients h_i
h = res.x[:L]

# extract 2nd-order term coefficients J_ij
Jvec = res.x[L:L+int(L*(L-1)/2)]
J, jcnt = np.nan*np.ones((L, L)), 0
for i in range(L):
    for j in range(i):
        J[i,j] = Jvec[jcnt]
        jcnt += 1

# extract 3rd-order term coefficients K_ijk
Kvec = res.x[L+int(L*(L-1)/2):L+int(L*(L-1)/2)+int(L*(L-1)*(L-2)/(3*2))]
K3, kcnt = np.nan*np.ones((L, L, L)), 0
for i in range(L):
    for j in range(i):
        for k in range(j):
            K3[i,j,k] = Kvec[kcnt]
            kcnt += 1

# aggregate K_ijk to 2-dimensional matrix K_ij
K2 = np.nan*np.ones((L, L))
for i in range(L):# i>j>k
    for j in range(i):
        K2[i,j] = 0.
for i in range(L):# i>j>k
    for j in range(i):
        for k in range(j):
            K2[i,j] += 1./(L-2)*abs(K3[i,j,k])
            K2[i,k] += 1./(L-2)*abs(K3[i,j,k])
            K2[j,k] += 1./(L-2)*abs(K3[i,j,k])

In [None]:
# plot h_i
# create figure
fig, ax = plt.subplots(figsize=(1., 3.2))

# color map
hmin, hmax = np.nanmin(h), np.nanmax(h)
cmap = plottoo.shiftedColorMap(mpl.cm.bwr_r, midpoint=abs(hmin)/(hmax+abs(hmin)))

# plot data
im = ax.imshow(h[:,np.newaxis], cmap=cmap)

# layout
ax.plot([-.5,.5], [(L-1)/2, (L-1)/2], c='k', linestyle='--')
labels = [r'\texttt{%s}'%st[1:-1] for st in muts]
ax.set_xticks([])
ax.set_yticks(range(L))
ax.set_yticklabels(labels)
ax.tick_params(labelsize=15)

# colorbar
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.1, 0.7])
cb = fig.colorbar(im, cax=cbar_ax)
cb.ax.tick_params(labelsize=15)

# save plot
plottoo.set_size(.625,2)
plt.savefig('output/1c_1.jpg', bbox_inches='tight', pad_inches=0.02, dpi=300)
plt.savefig('output/1c_1.pdf', bbox_inches='tight', pad_inches=0.02)
plt.show()

In [None]:
# plot J_ij
# create figure
fig, ax = plt.subplots(figsize=(3.2, 2.8))

# color map
Jmin, Jmax = np.nanmin(J), np.nanmax(J)
cmap = plottoo.shiftedColorMap(mpl.cm.bwr_r, midpoint=abs(Jmin)/(Jmax+abs(Jmin)))
cmap.set_bad(color='white')

# plot data
im = ax.imshow(J, cmap=cmap)

# layout
ax.plot([-1., L+1], [(L-1)/2, (L-1)/2], c='k', linestyle='--')
ax.plot([(L-1)/2, (L-1)/2], [-1., L+1], c='k', linestyle='--')
labels = [r'\texttt{%s}'%st[1:-1] for st in muts]
ax.set_xticks(range(L))
ax.set_yticks(range(L))
ax.set_xticklabels(labels, rotation='vertical')
ax.set_yticklabels(labels)
ax.tick_params(labelsize=15)
ax.set_xlim([-.5, L-.5])
ax.set_ylim([L-.5, -.5])

# colorbar
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.025, 0.7])
cb = fig.colorbar(im, cax=cbar_ax)
cb.ax.tick_params(labelsize=15)
cb.set_ticks([-.5,0,.5])

# save plot
plottoo.set_size(2.,2.286)
plt.savefig('output/1c_2.jpg', bbox_inches='tight', pad_inches=0.02, dpi=300)
plt.savefig('output/1c_2.pdf', bbox_inches='tight', pad_inches=0.02)
plt.show()

In [None]:
# plot K_ijk
# create figure
fig, ax = plt.subplots(figsize=(3.2, 2.8))

# colormap
cmap = plottoo.shiftedColorMap(mpl.cm.Blues, midpoint=abs(Jmin)/(Jmax+abs(Jmin)))
cmap.set_bad(color='white')

# plot data
im = ax.imshow(K2, cmap=cmap, vmin=0.)

# layout
ax.plot([-1., L+1], [(L-1)/2, (L-1)/2], c='k', linestyle='--')
ax.plot([(L-1)/2, (L-1)/2], [-1., L+1], c='k', linestyle='--')
labels = [r'\texttt{%s}'%st[1:-1] for st in muts]
ax.set_xticks(range(L))
ax.set_yticks(range(L))
ax.set_xticklabels(labels, rotation='vertical')
ax.set_yticklabels(labels)
ax.tick_params(labelsize=15)
ax.set_xlim([-.5, L-.5])
ax.set_ylim([L-.5, -.5])

# colorbar
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.025, 0.7])
cb = fig.colorbar(im, cax=cbar_ax)
cb.ax.tick_params(labelsize=15)
cb.set_ticks([0.,.1])

# save plot
plottoo.set_size(2.,2.286)
plt.savefig('output/1c_3.jpg', bbox_inches='tight', pad_inches=0.02, dpi=300)
plt.savefig('output/1c_3.pdf', bbox_inches='tight', pad_inches=0.02)
plt.show()

In [None]:
# create landscape object using specific fitness model data
lsmodel = lstoo.EmpLS(L=L, q=q, seqs=data.seq, fs=data.F1_model, default=np.nan)

# compute epistatic effect matrix (gamma_ij)
fitness = lambda seq: lsmodel.fitness(seq)
gammaijs = lstoo.gammaij(L, seqs, fitness)

In [None]:
fig, ax = plt.subplots(figsize=(2.8, 2.8), ncols=1, nrows=1, constrained_layout=True)

# plot data
im = ax.imshow(gammaijs, cmap=mpl.cm.bwr_r, vmin=-1., vmax=1.)

# layout
ax.set_xticks(range(L))
ax.set_xticklabels(labels, rotation='vertical')
labels = [r'\texttt{%s}'%st[1:-1] for st in muts]
ax.set_yticks(range(L))
ax.set_yticklabels(labels)
ax.tick_params(labelsize=15)
ax.set_xlabel(r'$j$', fontsize=15)
ax.set_ylabel(r'$i$', fontsize=15)
ax.plot([-1., L+1], [(L-1)/2, (L-1)/2], c='k', linestyle='--')
ax.plot([(L-1)/2, (L-1)/2], [-1., L+1], c='k', linestyle='--')
ax.set_xlim([-.5, L-.5])
ax.set_ylim([L-.5, -.5])

# colorbar
divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=.05)
cb = plt.colorbar(im, cax=cax)
cb.ax.tick_params(labelsize=15)
cb.set_ticks([-1,0,1])

# save plot
plottoo.set_size(2,2)
plt.savefig('output/1c_4.jpg', bbox_inches='tight', pad_inches=0.02, dpi=300)
plt.savefig('output/1c_4.pdf', bbox_inches='tight', pad_inches=0.02)
plt.show()