In [1]:
import numpy as np 
np.bool8 = np.bool
from plotly import express as px 
from plotly.subplots import make_subplots
import pandas
import dghf
import hill_test
import timeit
from scipy import optimize

In [2]:
def plot_error_and_time(kw_fit_arr,x_y_kw,title,bounds,x="coarse_n",
                        convert_v=None):
    df_error_N, df_time_N = error_values(kw_fit_arr=kw_fit_arr,x_y_kw=x_y_kw,
                                         bounds=bounds,time_repeats=10)
    x_vals = sorted(set([e for kw in kw_fit_arr for e in kw.keys()]))
    if convert_v is not None:
        for d in [df_error_N,df_time_N]:
            for x in x_vals:
                d[x] = [convert_v[x](e) for e in d[x]]
            d.sort_values(by=x_vals,inplace=True)
    df_error_median_N = df_error_N[["Error (%)","Parameter set"] + x_vals].groupby(["Parameter set"] + x_vals).median().reset_index()
    df_time_and_error = df_error_median_N.merge(df_time_N,on=["Parameter set"] + x_vals)
    # Create subplots
    category_orders = {x:sorted(set(df_time_and_error[x]))}
    fig = make_subplots(rows=2, cols=1,horizontal_spacing=0.15)
    fig.add_trace(px.box(x=x,y="Error (%)",points="all",data_frame=df_time_and_error,
                         log_y=True,category_orders=category_orders).data[0],row=1,col=1)
    fig.add_trace(px.box(df_time_N,x=x,y="Time (s)",points="all",
                        category_orders=category_orders).data[0],row=2,col=1)
    fig.update_xaxes(title_text=x,row=1,col=1)
    fig.update_yaxes(title_text="Error (%)",type='log',row=1,col=1)
    fig.update_xaxes(title_text=x,row=2,col=1)
    fig.update_yaxes(title_text="Time (s)",type='log',row=2,col=1)
    fig.update_layout(autosize=False,height=600,width=400,title=title)
    fig.show(renderer="iframe")

def error_values(kw_fit_arr,x_y_kw,bounds=None,time_repeats=0):
    errors = []
    times = []
    for kw_params in kw_fit_arr:
        for i,(x,y,kw) in enumerate(x_y_kw):
            _f_callable = lambda : dghf.fit(x,y,bounds=bounds,**kw_params)
            kw_fit = _f_callable()
            if time_repeats > 0:
                time_avg = timeit.timeit(_f_callable,number=time_repeats)
            else:
                time_avg = np.nan
            times.append({"Parameter set":i,"Time (s)":time_avg,**kw_params})
            for k,v in kw_fit.items():
                v_expected = kw[k]
                v_calculate = v 
                errors.append({"Parameter set":i,"Parameter":k,"Error (%)": 100 * abs((v_expected-v_calculate)/(v_expected)),**kw_params})
    return pandas.DataFrame(errors), pandas.DataFrame(times)

In [3]:
simulated_data = hill_test.MyTestCase().simulated_data
# ignore the all nan data set
x_y_kw = [s[0] for s in simulated_data if not set(s[0][-1].values()) == set([np.nan])]
# all of the simulated data has positive hill coefficient
bounds = [[None,None],[None,None],[None,None],[0,np.inf]]

In [4]:
plot_error_and_time(kw_fit_arr=[dict(coarse_n=coarse_n) for coarse_n in [1,2,3,4,5,6,7,8,9,10]],
                    x_y_kw=x_y_kw,title = "Error versus brute grid points",
                    bounds=bounds,x="coarse_n")

{'coarse_n': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}


In [5]:
plot_error_and_time(kw_fit_arr=[dict(finish=fin) for fin in [optimize.fmin,optimize.fmin_powell,
                                                             optimize.fmin_cg,optimize.fmin_bfgs]],
                    x_y_kw=x_y_kw,title = "Error versus brute grid points",
                    bounds=bounds,x="finish",convert_v={"finish":lambda x: str(x).split(" at ")[0].replace("<function ","")})

{'finish': ['fmin', 'fmin_bfgs', 'fmin_cg', 'fmin_powell']}


In [6]:
plot_error_and_time(kw_fit_arr=[dict(finish=fin) for fin in [optimize.fmin_powell]],
                    x_y_kw=x_y_kw,title = "Error versus brute grid points",
                    bounds=bounds,x="finish",convert_v={"finish":lambda x: str(x).split(" at ")[0].replace("<function ","")})

{'finish': ['fmin_powell']}
