# Model fitting

This notebook is used for fitting the experimental data to the mathematical model to determine the selection coefficient for the chromosome experiment.

## Import libraries, load data, and get host frequencies

In [None]:
import module,plot
import lmfit
from lmfit import Minimizer, Parameters
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy

Load experimental data from file

In [None]:
dataset = pd.read_excel("expdata/chrom-exp.xlsx", header=1)
dataset.head()

Get mean(std) of host frequencies data from dataset (ignore zeros)

In [None]:
data=[]
for i in range(1,6+1):
    data.append(
        dataset.loc[(dataset['replicate']==2) * (dataset['replicate.n']==i),['host_pop','total_pop']].to_numpy()[1:30+1])
data=np.array(data)
hostfreq=np.log10(data[:,:,0]/data[:,:,1])
hostfreq_mean=np.nanmean(hostfreq,axis=0)
hostfreq_std=np.std(hostfreq,axis=0)
print('Geometric mean and standard deviation of host cell frequencies for all days:')
print(hostfreq_mean)
print(hostfreq_std)


In [None]:
plt.plot(range(len(hostfreq[0])),hostfreq.T)

In [None]:
Nc=pd.read_csv('expdata/Nc_chromosome.csv', index_col=0).squeeze()
print('Carrying capacities loaded:')

Nc

## Fitting

In [None]:
s_residual_array = []

def fcn2min(params,x,hostfreq_mean,hostfreq_std):
    s=params['s']
    print(s)

    sim=module.stochbottleSim(rwt=1-s,n=1,Nc=Nc[1:].values.tolist(),f=10**((-4)),
    D=30,b=0.01)
    ts_eod=sim[1]
    host_freq_eod=np.log10(np.sum(ts_eod[:,1:],axis=-1)/np.sum(ts_eod,axis=-1))
    dev=host_freq_eod-hostfreq_mean
    eps=hostfreq_std
    res=dev/eps

    s_residual_array.append([s.value,np.sum(np.square(res))])
    
    figax=plt.subplots()
    # lim=(1e-6,1e-0); plt.ylim(*lim); 
    # plt.yscale('log')
    plt.errorbar(x=range(1,len(hostfreq_mean)+1),y=hostfreq_mean,yerr=eps, label='experiment',
    color='black', alpha=1.)
    plt.errorbar(x=range(1,len(host_freq_eod)+1),y=host_freq_eod)

    plt.legend()
    plt.show()
    # print('host_freq_eod (sim)',host_freq_eod)
    # print('hostfreq_mean (exp)',hostfreq_mean)
    print('dev',dev)
    print('eps',eps)
    print('res',res)
    print('chisquared',np.sum(np.square(res)),'\n\n')
    print('reduced chisquared',np.sum(np.square(res))/(30-1),'\n\n')
    print('regression standard error',np.sqrt(np.sum(np.square(res))/(30-1)),'\n\n')
    return res

x=np.array(range(0,len(hostfreq_mean)))

ls=np.linspace(0.12,0.16,17)
for s in ls:
    print(s)
    params=Parameters()
    params.add('s',value=s)
    fcn2min(params,x,hostfreq_mean,hostfreq_std)

params=Parameters()
# params.add('s',value=0.099,min=0.05,max=0.20)
params.add('s',value=0.099,min=0.00,max=1.00)

x=np.array(range(0,len(hostfreq_mean)))
minner=Minimizer(fcn2min,params,fcn_args=(x,hostfreq_mean,hostfreq_std))
result=minner.minimize(method='least_squares') #
print(minner)

# result s = 0.1359 ~= 0.14
result

In [None]:
result

In [None]:
plt.figure(figsize=(2.95,2.95*2/3))
plt.rcParams.update({'font.size': 8})

plt.plot([s_residual_array[i][0] for i in range(len(s_residual_array))], # x-values
         [(s_residual_array[i][1]/29)**0.5  for i in range(len(s_residual_array))], # y-values
         linestyle='none', marker='x', color='black',markersize=2.5
        )

# plt.yscale('log')
ylim=plt.gca().get_ylim(); print('auto-set ylim:',ylim)
plt.xlim(0.045,0.175)
ylim=(-4/20,4+4/20)
plt.ylim(*ylim)

plt.plot(np.ones(2)*0.0984,ylim)
sem=0.030453531/np.sqrt(6)
plt.fill_betweenx(ylim, np.ones(2)*0.0984-sem*1.96, np.ones(2)*0.0984+sem*1.96, alpha=0.25,linewidth=0)


plt.plot(np.ones(2)*result.params['s'].value, ylim, color='black'); 

plt.fill_betweenx(ylim, 
                  np.ones(2)*result.params['s'].value-result.params['s'].stderr*1.96, 
                  np.ones(2)*result.params['s'].value+result.params['s'].stderr*1.96, 
                  alpha=0.25,color='black',linewidth=0)


plt.xlabel('Strength of selection $s$')
plt.ylabel('Regression standard error')
plt.text(-0.25, 1.06, s='B', transform=plt.gca().transAxes, 
            size=11,weight='bold')
plt.title('Monoploid replicon')
plt.tight_layout()
plt.savefig('../figures-plots/plot_SI-fit_B.pdf')