# metrics: 
$r/s$ Roughness/slope ratio, which is defined as the standard deviation of the fitness values with respect to the best non‐epistatic (that is, linear) fit divided by the average of the absolute values of the linear coefficients

$N_{max}$ The number of fitness maxima

$F_{rse}$ The fraction of mutation pairs with reciprocal sign epistasis

$F_{bp}$ The fraction of blocked pathways

# fitness landscape:
* NK model (5, 10, 15 sites)
* RMF model (5, 10, 15 sites)
* Polynomial model (5, 10, 15 sites)

In [39]:
import numpy as np
import numpy.random as nrand
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import copy
import multiprocessing
from sklearn.linear_model import LinearRegression

In [56]:
# Change parameters to select fitness landscape and metric to analyze

model = 'NK' # Type of theoretical landscape model: 'NK', 'RMF', 'Polynomial'
N = 15 # Number of variable sites: 5, 10, 15
metric = 'open_ratio' # Type of metrics: 'N_max', 'epi', 'r_s', 'open_ratio'

In [57]:
with open(f'../FL_data_100X10/{model}_{N}_landscape_list_100X10.pkl', 'rb') as f:
    NK_landscape_list = pickle.load(f)

if N == 5:
    with open('../index_file/epi_list_5s_all.pkl', 'rb') as f:
        epi_list = pickle.load(f)
    with open('../index_file/pathway_list_5s_all.pkl', 'rb') as f:
        pathway_list = pickle.load(f)
elif N == 10:
    with open('../index_file/epi_list_10s_all.pkl', 'rb') as f:
        epi_list = pickle.load(f)
    with open('../index_file/pathway_list_10s_120000.pkl', 'rb') as f:
        pathway_list = pickle.load(f)
elif N == 15:
    with open('../index_file/epi_list_15s_200000.pkl', 'rb') as f:
        epi_list = pickle.load(f)
    with open('../index_file/pathway_list_15s_240000.pkl', 'rb') as f:
        pathway_list = pickle.load(f)
    with open('../index_file/N_max_list_15s_all.pkl', 'rb') as f:
        neighbor_list = pickle.load(f)

In [58]:
def get_N_max(landscape):
    N_max = 0
    for gt in landscape:
        seq = gt[0:N]
        fit = gt[N]
        flag = True
        for i,_ in enumerate(seq):
            seq_ = copy.deepcopy(seq)
            seq_[i] = 1 - seq_[i]
            tmp = ''.join(seq_.astype(int).astype(str))
            idx = int(tmp, 2)
            fit_ = landscape[idx,N]
            if fit < fit_:
                flag = False
                break
        if flag == True:
            N_max += 1
    return N_max    

def get_N_max_15(landscape):
    return np.sum(np.max(landscape[neighbor_list][:,:,-1],axis=1) == landscape[neighbor_list[:,0]][:,-1])

def cal_epi(landscape):
    epi_fit_list = landscape[epi_list][:,:,-1]
    n_epi = np.sum(np.sum(epi_fit_list[:,[0,0,3,3]] > epi_fit_list[:,[1,2,1,2]],axis=1)==4)
    return n_epi/len(epi_fit_list)

def cal_r_s(landscape):
    X = landscape[:,:N]
    y = landscape[:,-1]
    reg = LinearRegression().fit(X, y)
    y_predict = reg.predict(landscape[:,:N])
    roughness = np.sqrt(np.mean(np.square(y - y_predict)))
    slope = np.mean(np.abs(reg.coef_))
    return roughness/slope

def cal_open_ratio(landscape):
    pathway_fit_list = landscape[pathway_list][:,:,-1]
    total_open = np.sum(np.sum(pathway_fit_list[:,0:4]<=pathway_fit_list[:,1:5],axis=1)==pathway_fit_list.shape[1]-1)+\
    np.sum(np.sum(pathway_fit_list[:,0:4]<=pathway_fit_list[:,1:5],axis=1)==0)
    return total_open/len(pathway_list)

if metric == 'N_max':
    if N == 15:
        get_ruggedness = get_N_max_15
    else:
        get_ruggedness = get_N_max

elif metric == 'r_s':
    get_ruggedness = cal_r_s

elif metric == 'epi':
    get_ruggedness = cal_epi

elif metric == 'open_ratio':
    get_ruggedness = cal_open_ratio
    
    

In [59]:
def normalize(array):
    MAX = np.max(array)
    MIN = np.min(array)
    return (array - MIN)/(MAX - MIN)

In [60]:
def Add_Error(landscape,std):
    landscape_error = copy.deepcopy(landscape)
    landscape_error[:,N] += np.random.normal(0,std,landscape_error.shape[0])
    landscape_error = normalize(landscape_error)
    return landscape_error

In [61]:
def worker(start):
    plot_dict = {f'{metric}_T':[],f'{metric}_E':[],'std':[]}
    error_list = [0.02,0.04,0.06,0.08]
    for i in range(start+1,start+11):
        print(i)
        for j in range(10):
            ruggedness_T = get_ruggedness(NK_landscape_list[i][j])
            for k in error_list:
                NK_landscape_error = Add_Error(NK_landscape_list[i][j],k)
                ruggedness_E = get_ruggedness(NK_landscape_error)
                plot_dict[f'{metric}_T'].append(ruggedness_T)
                plot_dict[f'{metric}_E'].append(ruggedness_E)
                plot_dict['std'].append(k)
    plot_df = pd.DataFrame(plot_dict)
    return plot_df

In [None]:
jobs = []
with multiprocessing.Pool(10) as p:
    res_list = p.map(worker, [i*10 for i in range(10)])

In [None]:
plot_df = pd.concat(res_list)

In [None]:
# Uncomment only if you want to overwrite plot data files in plot_data folder.

# with open(f'./plot_data/{model}_{N}_{metric}_plot_df.pkl','wb') as f:
#     pickle.dump(plot_df,f)