In [1]:
import numpy as np
import numpy.random as nrand
import pandas as pd
import pickle
import copy
import matplotlib.pyplot as plt
from itertools import combinations
from matplotlib.ticker import FormatStrFormatter
from scipy.optimize import curve_fit
import itertools
from sklearn.linear_model import LinearRegression
import uncertainties as unc
import uncertainties.unumpy as unp
from multiprocessing import Pool

In /home/siliang/usr/miniconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The savefig.frameon rcparam was deprecated in Matplotlib 3.1 and will be removed in 3.3.
In /home/siliang/usr/miniconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The verbose.level rcparam was deprecated in Matplotlib 3.1 and will be removed in 3.3.
In /home/siliang/usr/miniconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The verbose.fileo rcparam was deprecated in Matplotlib 3.1 and will be removed in 3.3.


In [2]:
N_list = [5,10,15]
model_list = ['NK','RMF','Polynomial']
metric_list = ['N_max','epi','r_s','open_ratio']

In [3]:
all_df = pd.DataFrame()
for N in N_list:
    for model in model_list:
        for metric in metric_list:
            with open(f'./raw_data/{model}{N}_{metric}_raw.pkl', 'rb') as f:
                res_dict = pickle.load(f)
            res_dict = pd.DataFrame(res_dict)
            all_df = pd.concat([all_df,res_dict],ignore_index=True)


In [4]:
# Turn open_ratio to blocked_ratio
raw_dict_list = []
for idx,row in all_df.iterrows():
    if row['metric'] == 'open_ratio':
        all_df.loc[idx,'metric'] = 'blocked_ratio'
        all_df.loc[idx,'ground_truth'] = 1- row['ground_truth']
        raw_dict = {}
        for i in range(row['replication']):
            raw_dict[i+1] = [1-k for k in row['raw'][i+1]]
    else:
        raw_dict = row['raw']
    raw_dict_list.append(raw_dict)
all_df.loc[:,'raw'] = raw_dict_list

In [None]:
# Add averaged_ruggedness
averaged_ruggedness = []
for i,item in all_df.iterrows():
    averaged_ruggedness.append(item['raw'][int(item['replication'])][0])
all_df['averaged_ruggedness'] = averaged_ruggedness

In [None]:
# Pre-calculate convex_concave information for replication of 3.
convex_concave = []
for i,item in all_df.iterrows():
    if item['replication'] == 3:
        if np.mean(item['raw'][1]) + np.mean(item['raw'][3]) >= 2*np.mean(item['raw'][2]):
            convex_concave.append('concave')
        else:
            convex_concave.append('convex')
    else:
        convex_concave.append(None)
all_df['convex_concave'] = convex_concave

In [None]:
# Holling type III response curve
def func(x, a, b, c):
    return a*x**2/(a*b*x**2+1)+c

# SSE of model from mean
def SSE(popt,ruggedness_dict,rep):
    return sum((func(np.array([1/np.sqrt(i) for i in range(1,rep+1)]), *popt)-\
                np.array([np.mean(ruggedness_dict[i]) for i in range(1,rep+1)]))**2)

In [None]:
# Fitting Holling type III response model to the averaged data points.
def fit_raw(ruggedness_dict,rep):
    xdata = [1/np.sqrt(i) for i in range(1,rep+1)]
    ydata = [np.mean(ruggedness_dict[i]) for i in range(1,rep+1)]
    
    popt,pcov = curve_fit(func, xdata, ydata,maxfev=100000,bounds=([0,0,0],[np.inf,np.inf,np.inf]))
    sse = SSE(popt,ruggedness_dict,rep)
        
    return popt,pcov,sse

In [None]:
def worker(idx):
    
    # A: alpha, B: beta, C: delta
    res_dict = {'prediction':[],'prediction_std':[],'fitting_sse':[],'popt':[],'pcov':[],'A':[],'B':[],'C':[]}
    
    for i in range(idx*650,(idx+1)*650):
        
        # Stop loop once reached the upper limit 15586
        if i >= 15586:
            break
            
        print(i)
        raw_data = all_df.loc[i,'raw']
        rep = all_df.loc[i,'replication']
        groundt = all_df.loc[i,'ground_truth']
        popt,pcov,sse = fit_raw(raw_data,rep)
        
        # Don't calculate prediction std, SEE, and covariance matrix if there are only 3 replicates.
        if rep == 3:
            y0_nom = func(0, *popt)
            y0_std = -1.0
            sse = -1.0
            pcov = -1.0
        else:
            a,b,c = unc.correlated_values(popt,pcov)
            y0 = func(0, a, b, c)
            y0_nom = float(unp.nominal_values(y0))
            y0_std = float(unp.std_devs(y0))

        res_dict['prediction'].append(y0_nom)
        res_dict['prediction_std'].append(y0_std)
        res_dict['fitting_sse'].append(sse)
        res_dict['popt'].append(popt)
        res_dict['pcov'].append(pcov)
        res_dict['A'].append(np.mean(all_df.loc[i,"raw"][1]) - groundt)
        res_dict['B'].append(all_df.loc[i,"raw"][rep][0] - groundt)
        res_dict['C'].append(y0_nom - groundt)
        
    return pd.DataFrame(res_dict)
    
    

In [None]:
with Pool(24) as p:
    res_df = p.map(worker, range(24))
res_df = pd.concat(res_df,ignore_index=True)
all_df = pd.concat([all_df, res_df],axis=1)

In [None]:
# Uncomment only if you want to overwrite extrapolation_result.pkl.
#with open('extrapolation_result.pkl','wb') as f:
#    pickle.dump(all_df,f)