# Simulations with selection for SI

## Preamble

Here we perform simulations with a selection coefficient $s=0.1$ instead of using the 
estimated $s$-values from the fit.

Similar to selection.ipynb, this notebook is used to obtain 
1. create input files for simulation(parallel simulation is done on a computer cluster), see [Simulation](#methods)</a>, and
2. analyze the simulation data (get data from simulation output files and create plots), see  [Results](#results)

## <a id='init'>Import libraries, global settings, and load experimental data</a>

In [None]:
import numpy as np
import module,plot,data
import pickle
import matplotlib
import matplotlib.pyplot as plt
import itertools
import re
import pandas as pd
from functools import partial
from matplotlib.lines import Line2D
import string
import json

In [None]:
np.set_printoptions(precision=3)
font = {'family' : 'Arial','size'   : 8}
matplotlib.rc('font', **font)

Load experimental data from file

In [None]:
dataset = data.loaddata(datafile="expdata/plasmid-exp.xlsx")
dataset.head()

In [None]:
dataset_chromosome = data.loaddata(datafile="expdata/chrom-exp.xlsx",header=1)
dataset_chromosome.head()

Load carrying capacities from file

In [None]:
Nc=pd.read_csv('expdata/Nc_plasmid.csv', index_col=0).squeeze()
Nc_chromosome=pd.read_csv('expdata/Nc_chromosome.csv', index_col=0).squeeze()
print('Carrying capacities loaded')

## <a id='methods'>Simulation</a>

Initialise simulation parameters and write to input files

In [None]:
fixedparameters={
    'n' : 15,
    'rep' : 'reg',
    'Nc' : Nc[1:].values.tolist(), # use day 1-30 for simulations
    'D' : 30 # simulate for 30 days, starting at day 1
    }

fixedparameters_chromosome={
    'n' : 1,
    'rep' : 'reg',
    'Nc' : Nc_chromosome[1:].values.tolist(), # use day 1-30 for simulations
    'D' : 30 # simulate for 30 days, starting at day 1
    }

s=0.1
s_chromosome=.1

repindex_list=list(range(1,12+1))
f_list=[ # chosen by comparison with initial frequencies at day 1
    10**(-4),  10**(-4),  10**(-4),  10**(-4) , 
    10**(-6),  10**(-6),  10**(-6),  10**(-6),  
    10**(-7),  10**(-7),  10**(-7),  10**(-7)  
]
def rwt_list(s):
    return [  
    1,      1-s,        1,          1-s,
    1,      1-s,        1,          1-s,
    1,      1-s,        1,          1-s
    ]
b_list=[    
    0.01,   0.01,       0.001,      0.001,
    0.01,   0.01,       0.001,      0.001,
    0.01,   0.01,       0.001,      0.001,
    ]

seeds=range(100)

def parset(system,s,par):
    repindex,seed=par
    f=f_list[repindex-1]
    rwt=rwt_list(s)[repindex-1]
    b=b_list[repindex-1]
    return {"repindex":repindex,"f":f,"rwt":rwt,"b":b,"seed":seed,
        "file":"selection-SI_output/"+re.sub('[^a-zA-Z0-9_]','_',
                                '_'.join(map(str,
                                ("system",system,"repindex",repindex,"seed",seed)))
        )}

l=list(itertools.product(repindex_list,seeds))
parsets=list(map(partial(parset,'plasmid',s),l))

l_chromosome=list(itertools.product(repindex_list,seeds))
parsets_chromosome=list(map(partial(parset,'chromosome',s_chromosome),l))

print('initialized ', len(parsets), ' parameter combinations for plasmid simulation')
print('initialized ', len(parsets_chromosome), ' parameter combinations for chromosome simulation')

print('repindex_list:', repindex_list)
print('seeds:', seeds)

print('Example parameter input', fixedparameters | parsets[0])

# write plasmid and chromosome parametersets to input files
j=0
for i in range(len(parsets)):
    with open( "selection-SI_input/inputfile_"+str(j), 'w') as file:
        file.write(json.dumps(fixedparameters | parsets[i]))
    j+=1
for i in range(len(parsets)):
    with open( "selection-SI_input/inputfile_"+str(j), 'w') as file:
        file.write(json.dumps(fixedparameters_chromosome | parsets_chromosome[i]))
    j+=1
print("Input files saved. ")


Testing simulation

In [None]:
# Testing:

print('simulation test')
p=json.load(open( "selection-SI_input/inputfile_1400", 'r'))
print(p['rwt'],p['f'],p['n'])
del p["repindex"]

sim=module.stochbottleSim(**p)
plot.plot_interdays(sim)

Load and plot all simulations to check that outputfiles exist f.a. parameters

In [None]:
fig,ax=plt.subplots()
# i0=000
for i,p in enumerate(parsets[:]+parsets_chromosome[:]):
# for i,p in enumerate(parsets_chromosome[:]):
    # print(p)
    try:
        sim=pickle.load(open(p['file'],'rb'))
        # print(sim)
        plot.plot_interdays(sim,figax=(fig,ax),
            lim=(1e-7*0.9,1e-0/0.9),alpha=1)
    except:
        if not i%100 : print('Could not find '+p['file'], 'parset no: ', i) 
    
# print parset


## <a id='results'>Results</a>

Figure panel B (Survival of chromosome and plasmid allele)

In [None]:
# Style specifications
marker={'plasmid':'o','chromosome':'o'}
color={'plasmid':'black','chromosome':'black'}
linestyle={'plasmid':'solid','chromosome':'dotted'}
dashes={'plasmid':(None,None),'chromosome':(None,None)} #(1.,1.)
markersize=2
linewidth=1.
linewidthmarker=.5
markeredgewidth=.5

In [None]:
fig,ax=plt.subplots(nrows=3,ncols=4,figsize=(16/2.54,10/2.54),sharex=True,sharey=True)
plt.subplots_adjust(left=1.6/16,bottom=1.1/10,right=(1.6+3.2*4+0.4*3)/16,top=(1.1+2.5*3+0.4*2)/10,
        wspace=0.15, hspace=0.15)
fig.text(0.53, 0.02, 'Number of transfer', ha='center', va='center')
fig.text(0.03, 0.52, 'Cell frequencies', ha='center', va='center', rotation='vertical')

c={ 'host' : 'black',
    'het': '#008b8bff',
    'hom': 'violet',
}
l={ 'host' : 1.,
    'het': 1,
    'hom': 1,
}
ls={ 'host' :'solid',
    'het': 'dotted',
    'hom': 'dashed',
}

zip_repsysax={ 
            "groupselection" : zip(
                [2,2,4,4,6,6,8,8,10,10,12,12,],
                ['chromosome' if i%2 else 'plasmid'  for i in range(12)],
                ax.flat),
            "groupsystem": zip(
                [2,4,6,8,10,12,2,4,6,8,10,12],
                ['plasmid' if i<=6 else 'chromosome' for i in range(1,12+1)],
                ax.flat),
            }
group="groupselection"

for rep,system,ax1 in zip_repsysax[group]:

    i0=int((rep-1)*100)
    parsetssystem={'plasmid':parsets,'chromosome':parsets_chromosome}
    # print(i0,parsetssystem[system][i0]['file'])
    sims=[]
    for i in parsetssystem[system][i0:i0+100]:
        try:
            sims.append(pickle.load(open(i['file'],'rb')))
        except:
            print('no file:', i['file'])
    genotypes=['host','het'] if system=='plasmid' else ['host']

    for genotype in genotypes:
        logfreqs=[]
        freqs=[]
        for i in range(len(sims)):
            ts_eod=sims[i][1]
            with np.errstate(divide='ignore'):
                if genotype=='het':
                    logfreq=np.log10(np.sum(ts_eod[:,1:-1],axis=-1)/np.sum(ts_eod,axis=-1))
                    freq=np.sum(ts_eod[:,1:-1],axis=-1)/np.sum(ts_eod,axis=-1)
                if genotype=='hom':
                    logfreq=np.log10(ts_eod[:,-1]/np.sum(ts_eod,axis=-1))
                    freq=ts_eod[:,-1]/np.sum(ts_eod,axis=-1)
                if genotype=='host':
                    logfreq=np.log10(np.sum(ts_eod[:,1:],axis=-1)/np.sum(ts_eod,axis=-1))
                    freq=np.sum(ts_eod[:,1:],axis=-1)/np.sum(ts_eod,axis=-1)
            logfreqs.append(logfreq)
            freqs.append(freq)
        logfreqs=np.array(logfreqs)
        freqs=np.array(freqs)
        ignorezeros=True
        if ignorezeros:
            logfreqs[np.isinf(logfreqs)]=np.nan
            freqs[freqs==0]=np.nan
        else:
            logfreqs[np.isinf(logfreqs)]=-1000

        q=np.nanquantile(logfreqs,q=[(1-.95)/2,(1-.68)/2,1-(1-.68)/2,1-(1-.95)/2],axis=0)
        m=np.nanmean(freqs,axis=0)

        if genotype in ['host']:
            ax1.fill_between(range(1,len(q[0])+1),y1=q[0],y2=q[-1],color=c[genotype],alpha=.1,edgecolor="none")
            ax1.fill_between(range(1,len(q[0])+1),y1=q[1],y2=q[-2],color=c[genotype],alpha=.35,edgecolor="none")

        la={'host' :'Mutant',
        'het': 'Heterozygote',
        'hom': None}
        ax1.plot(range(1,len(m)+1),np.log10(m),color=c[genotype],linewidth=l[genotype],linestyle=ls[genotype],label=la[genotype])

    if system=='plasmid':
        data=dataset.loc[(dataset['replicate']==rep) * (dataset['t']>=1),
                                ['replicate.n','t','hetero_freq','homo_freq','host_freq']]
    else:
        data=dataset_chromosome.loc[(dataset_chromosome['replicate']==rep) * (dataset_chromosome['t']>=1),
                                ['replicate.n','t','host_freq']]
    data.set_index('t', inplace=True)
    data['host_freq']=data['host_freq'].apply(np.log10)
    data.groupby('replicate.n')['host_freq'].plot(ax=ax1, 
        color='black',
        linestyle='none',linewidth=0.5, 
        marker=marker[system], markersize=.5,
        alpha=.25,label='_nolegend_')
    if 'het' in genotypes:
        data['hetero_freq']=data['hetero_freq'].apply(np.log10)
        data['homo_freq']=data['homo_freq'].apply(np.log10)
        data.groupby('replicate.n')['hetero_freq'].plot(ax=ax1, 
            color='#008b8bff', 
            linestyle='None',linewidth=0.5, 
            marker=marker[system], markersize=.5,
            alpha=.25,label='_nolegend_')

    ax1.set_ylim((-8,.2));
    ax1.set_yticks(list(range(-7,1)))
    ax1.set_xticks([1,5,10,15,20,25,30])
    ax1.set_xticklabels([1,5,10,15,20,25,30], fontsize=6)
    
    ax1.set_yticklabels([ '$10^{'+str(i)+'}$' if type(i)==int and i<0 else i for i in ['',-6,'','',-3,'','',1] ], fontsize=5)
    # ax1.grid(linewidth=0.25)
    ax1.yaxis.set_ticks_position('both')
    ax1.tick_params(length=0,axis='both',which='minor')
    ax1.tick_params(direction='in',axis='both',which='both')
    ax1.set_xlabel('')

if group=="groupselection":

    label=['Polyploid $n_\mathrm{rc}=15$','Monoploid $n_\mathrm{rc}=1$','Polyploid $n_\mathrm{rc}=15$','Monoploid $n_\mathrm{rc}=1$']
    for label, ax1 in zip(label,ax.flat[[0,1,2,3]]):
        ax1.text(.55, 1.03, label, transform=ax1.transAxes, 
                size=8, weight='normal',ha='center')

custom_lines = \
        [   
        Line2D([0], [0], 
            color=c['host'], linestyle=ls['host'], marker='None',
            linewidth=l['host']),
        Line2D([0], [0], 
            color=c['het'], linestyle=ls['het'], marker='None',
            linewidth=l['het']),
        Line2D([0], [0], 
            color=c['host'],marker=marker['plasmid'], markersize=.5,linestyle='None',alpha=.25),
        
        ]

numbers=['i','ii','iii','iv','v','vi','vii','viii','ix','x','xi','xii']
numbers=['1','2','3','4','5','6','7','8','9','10','11','12']

for n, ax1 in zip(numbers,
        ax.flat):
    ax1.text(0.00, 1.025, n, transform=ax1.transAxes, 
            size=9, weight='normal')
for n, ax1 in zip([0,1],[ax[0,0]]):
    ax1.text(-0.18 if n in [0,4,8] else -0.11, 1.03, string.ascii_uppercase[n], transform=ax1.transAxes, 
            size=10,weight='bold')


ax[0,0].legend(custom_lines, ['Novel phenotype','Heterozygotes','Experiment'],fontsize=6,loc='lower right',handler_map = {tuple: matplotlib.legend_handler.HandlerTuple(None)})

#ax[2,0].legend()

fig.savefig('../figures-plots/plot_SI-selection_A.pdf')


In [None]:
fig,ax=plt.subplots(nrows=2,ncols=4,figsize=(16/2.54,7/2.54),sharex=True,sharey=True)
plt.subplots_adjust(left=1.6/16,bottom=1.1/7,right=(1.6+3.2*4+0.4*3)/16,top=(1.1+2.5*2+0.4*1)/7,wspace=0.15, hspace=0.15)
fig.text(0.53, 0.05, 'Number of transfer', ha='center', va='center')
fig.text(0.03, 0.52, 'Survival of the novel allele', ha='center', va='center', rotation='vertical')

def plot_survdyn(parsets,dataset,ax_list,system='plasmid'):
    data=dataset.loc[(dataset['t']>=1),
                                ['replicate','replicate.n','t','host_freq']]

    zeroarray_keys=['t','replicate','replicate.n','host_freq']
    zeroarray_values=[[t,replicate,replicate_n,0.] for replicate in range(1,12+1) 
                                                    for replicate_n in range(1,6+1) 
                                                    for t in range(1,30+1)
                    if len(dataset.loc[(dataset['t']==t) * 
                                    (dataset['replicate']==replicate) * 
                                    (dataset['replicate.n']==replicate_n),
                                :])==0          ]
    zeroarray_values=np.array(zeroarray_values)
    zerodataframe=pd.DataFrame(data=zeroarray_values, columns=zeroarray_keys)
    data=pd.concat([data,zerodataframe],ignore_index=True).sort_values(by=['replicate', 'replicate.n', 't'])

    for rep,ax1 in zip([6,8,10,12],
                    ax_list):
        i0=int((rep-1)*100)
        sims=[pickle.load(open(i['file'],'rb')) for i in parsets[i0:i0+100]]
        hostcell_freqs=[]
        for i in range(len(sims)):
            hostcell_freqs.append(np.sum(sims[i][1][:,1:],axis=1))
        hostcell_freqs=np.array(hostcell_freqs)
        hostcell_freqs_nonzero=(100-np.count_nonzero(hostcell_freqs,axis=0))/100

        ax1.plot(range(1,len(hostcell_freqs_nonzero)+1), 1-hostcell_freqs_nonzero,
            color=color[system],linewidth=linewidth,linestyle=linestyle[system],alpha=1.,
            dashes=dashes[system]
            )

        extinct_exp=data.loc[(data['replicate']==rep),
                                ['t','host_freq']].groupby(['t']).apply(lambda x:(6-np.count_nonzero(x['host_freq']))/6)
        ax1.errorbar(x=extinct_exp.index,y=1-extinct_exp[:],
                #yerr=np.sqrt(extinct_exp*(1-extinct_exp)/6),
                capsize=0.75, elinewidth=.25, markeredgewidth=markeredgewidth,
                linestyle='None',linewidth=linewidthmarker, color=color[system], 
                marker=marker[system], markersize=markersize,alpha=1.,fillstyle='full', 
                
                )
        
        ax1.set_ylim(-0.1,1.1)
        ax1.set_xlim(-.5,21+.5)
        ax1.set_yticks(np.linspace(0,1.0,6))
        ax1.set_xticks([1,5,10,15,20])
        ax1.set_yticklabels(['%i %%' % y for i,y in enumerate(np.linspace(0,100,6))],fontsize=6)
        ax1.set_xticklabels([1,5,10,15,20],fontsize=6)
        #ax1.grid(linewidth=0.25)
        ax1.tick_params(length=0,axis='both',which='both')

plot_survdyn(parsets,dataset,[ax[0,0],ax[0,2],ax[1,0],ax[1,2]])
plot_survdyn(parsets_chromosome,dataset_chromosome,[ax[0,1],ax[0,3],ax[1,1],ax[1,3]],system='chromosome')

custom_lines = \
        [   
        
        Line2D([0], [0], 
            color=color['plasmid'], linestyle=linestyle['plasmid'], marker='None',
            linewidth=linewidth,dashes=dashes['plasmid']),
        Line2D([0], [0], 
            color=color['plasmid'], linestyle='None', marker=marker['plasmid'],
            fillstyle='full',markersize=markersize,markeredgewidth=markeredgewidth),
        ]

ax[0,0].legend(custom_lines, ['Simulation','Experiment'],fontsize=6,handler_map = {tuple: matplotlib.legend_handler.HandlerTuple(None)})

label=['Polyploid $n_\mathrm{rc}=15$','Monoploid $n_\mathrm{rc}=1$','Polyploid $n_\mathrm{rc}=15$','Monoploid $n_\mathrm{rc}=1$']
for label, ax1 in zip(label,ax.flat[[0,1,2,3]]):
    ax1.text(.55, 1.03, label, transform=ax1.transAxes, 
            size=8, weight='normal',ha='center')

numbers=['i','ii','iii','iv','v','vi','vii','viii','ix','x','xi','xii']
numbers=['1','2','3','4','5','6','7','8','9','10','11','12']


for n, ax1 in zip(numbers,
        ax.flat):
    ax1.text(0.00, 1.025, n, transform=ax1.transAxes, 
            size=9, weight='normal')

for n, ax1 in enumerate([ax[0,0]]):
    ax1.text(-0.18 , 1.02, string.ascii_uppercase[n+1], transform=ax1.transAxes, 
            size=10, weight='bold')

fig.savefig('../figures-plots/plot_SI-selection_B.pdf')