# Function Fitting

In [1]:
import numpy as np
from scipy.optimize import curve_fit
from scipy.optimize import least_squares
import DC_Pickle as dcp
import Curve_Functions as cv
import matplotlib.pyplot as plt
%matplotlib inline

def disp_Data(x, y_true, y_pred, file_path, clt_num, cost, rows=1, columns=1, size=(6, 4)):
    fig, ax = plt.subplots(rows, columns, figsize=size)
    ax.plot(x, y_true, 'rx', label='average score')
    ax.plot(x, y_pred, 'b-', label='curve fitting')
    ax.set_xlim([0, max(x)+1])
    ax.set_ylim([0, max(y_true)+0.2])
    ax.legend(fontsize=14)
    ax.set_title("cluster {0}: cost {1}".format(clt_num, round(cost, 2)))
    fig.savefig(file_path, dpi=100)
    plt.show()

## load data

In [2]:
train_idx = dcp.open_Pickle("../../data/pickles/clusters_origin/indices/index13.pickle")
train_idx = train_idx[9] #2, 8

train_scores = dcp.open_Pickle('../../data/pickles/seperate_origin/eventValue.pickle')
train_scores = train_scores[:300, :]/1e+4
attempts15 = np.arange(15)+1
attempts300 = np.arange(300)+1

## filtering discontinuous under 15 attempts
idx_all = []
idx_pure = []

for i in range(train_scores.shape[1]):
    if not np.isnan(train_scores[:15, i]).any():
        idx_all.append(i)
        idx_pure.append(i)
    else:
        idx_all.append(np.nan)

train_scores = train_scores[:, idx_pure]
print("Training data set: {0}".format(np.shape(train_scores)))

Training data set: (300, 22832)


In [3]:
## get cluster data and cluster average data (average is centroid)
nClt = 13
for i in range(nClt):
    name = "cluster{0}".format(i+1)
    globals()[name] = train_scores[:, train_idx==i] # get cluster data
    
    ## get cluster average data
    data = np.ones(300)
    for j in range(len(data)):
        avg = eval(name)[j, :]
        avg = np.sum(avg[~np.isnan(avg)])/len(avg[~np.isnan(avg)])
        data[j] = avg

    globals()["avg{0}".format(i+1)] = data

## make folder for each function

In [None]:
dcp.make_folders("Figs/curve_fitting/")

## exponential fit

### Two parameters

#### train on 15 attempts

In [None]:
dcp.make_folders("Figs/curve_fitting/exponential2")

seed = [1, 1]
for i in range(nClt):
    print("cluster {0}:".format(i+1))
    ## train
    exp2_opt, exp2_cost = cv.curve_Fitting(
        cv.exponential_least2, cv.exponential_curve2, 
        attempts15, eval("avg{0}".format(i+1))[:15], seed, 
        "Figs/curve_fitting/exponential2/{0}".format(i+1), clt_num = i+1)
    ## validation
    y_fit = cv.exponential_curve2(attempts300, exp2_opt[0], exp2_opt[1])
    
    exp2_cost300 = cv.cost_Function(eval("avg{0}".format(i+1)), y_fit) # get cost for all data
    
    disp_Data(attempts300, eval("avg{0}".format(i+1)), y_fit, 
              file_path="Figs/curve_fitting/exponential2/valid{0}".format(i), 
              clt_num=i+1, cost = exp2_cost300)

### Three parameters

#### train on 15 attempts

In [None]:
dcp.make_folders("Figs/curve_fitting/exponential3")

seed = [1, 1, 1]
for i in range(nClt):
    print("cluster {0}:".format(i+1))
    exp3_opt, exp3_cost = cv.curve_Fitting(
        cv.exponential_least3, cv.exponential_curve3, 
        attempts15, eval("avg{0}".format(i+1))[:15], seed,
        "Figs/curve_fitting/exponential3/{0}".format(i+1), clt_num = i+1)

    y_fit = cv.exponential_curve3(attempts300, exp3_opt[0], exp3_opt[1], exp3_opt[2])
    exp3_cost300 = cv.cost_Function(eval("avg{0}".format(i+1)), y_fit) # get cost for all data
    
    cost300 = cv.cost_Function(attempts300, y_fit)
    disp_Data(attempts300, eval("avg{0}".format(i+1)), y_fit, 
              file_path="Figs/curve_fitting/exponential3/valid{0}".format(i), 
              clt_num=i+1, cost = exp3_cost300)

### polynoimial fit

In [None]:
dcp.make_folders("Figs/curve_fitting/polynomial2")

seed = [1, 1]
for i in range(nClt):
    print("cluster {0}:".format(i+1))
    poly_opt, poly_cost = cv.curve_Fitting(
        cv.polynomial_least, cv.polynomial_curve, 
        attempts15, eval("avg{0}".format(i+1))[:15], seed,
        "Figs/curve_fitting/polynomial2/{0}".format(i+1), clt_num = i+1)

    y_fit = cv.polynomial_curve(attempts300, poly_opt[0], poly_opt[1])
    
    poly_cost300 = cv.cost_Function(eval("avg{0}".format(i+1)), y_fit) # get cost for all data
    disp_Data(attempts300, eval("avg{0}".format(i+1)), y_fit, 
              file_path="Figs/curve_fitting/polynomial2/valid{0}".format(i), 
              clt_num=i+1, cost = poly_cost300)

### power law fit

#### Two parameters

In [None]:
dcp.make_folders("Figs/curve_fitting/powerlaw2")

seed = [1, 1]
for i in range(nClt):
    print("cluster {0}:".format(i+1))
    pl2_opt, pl2_cost = cv.curve_Fitting(
        cv.powerlaw_least2, cv.powerlaw_curve2, 
        attempts15, eval("avg{0}".format(i+1))[:15], seed,
        "Figs/curve_fitting/powerlaw2/{0}".format(i+1), clt_num = i+1)
    
    y_fit = cv.powerlaw_curve2(attempts300, pl2_opt[0], pl2_opt[1])
    pl2_cost300 = cv.cost_Function(eval("avg{0}".format(i+1)), y_fit) # get cost for all data
    
    disp_Data(attempts300, eval("avg{0}".format(i+1)), y_fit, 
              file_path="Figs/curve_fitting/powerlaw2/valid{0}".format(i), 
              clt_num=i+1, cost = pl2_cost300)

#### Thress parameters

In [None]:
dcp.make_folders("Figs/curve_fitting/powerlaw3")

seed = [1, 1, 1]
for i in range(nClt):
    print("cluster {0}:".format(i+1))
    pl3_opt, pl3_cost = cv.curve_Fitting(
        cv.powerlaw_least3, cv.powerlaw_curve3, 
        attempts15, eval("avg{0}".format(i+1))[:15], seed,
        "Figs/curve_fitting/powerlaw3/{0}".format(i+1), clt_num = i+1)
    
    y_fit = cv.powerlaw_curve3(attempts300, pl3_opt[0], pl3_opt[1], pl3_opt[2])
    pl3_cost300 = cv.cost_Function(eval("avg{0}".format(i+1)), y_fit) # get cost for all data
    
    disp_Data(attempts300, eval("avg{0}".format(i+1)), y_fit, 
              file_path="Figs/curve_fitting/powerlaw3/valid{0}".format(i), 
              clt_num=i+1, cost = pl3_cost300)

#### Four parameters

In [None]:
dcp.make_folders("Figs/curve_fitting/powerlaw4")

seed = [1, 1, 1, 1]
for i in range(nClt):
    print("cluster {0}:".format(i+1))
    pl4_opt, pl4_cost = cv.curve_Fitting(
        cv.powerlaw_least4, cv.powerlaw_curve4, 
        attempts15, eval("avg{0}".format(i+1))[:15], seed,
        "Figs/curve_fitting/powerlaw4/{0}".format(i+1), clt_num = i+1)
    
    y_fit = cv.powerlaw_curve4(attempts300, pl4_opt[0], pl4_opt[1], pl4_opt[2], pl4_opt[3])
    pl4_cost300 = cv.cost_Function(eval("avg{0}".format(i+1)), y_fit) # get cost for all data
    
    disp_Data(attempts300, eval("avg{0}".format(i+1)), y_fit, 
              file_path="Figs/curve_fitting/powerlaw4/valid{0}".format(i), 
              clt_num=i+1, cost = pl4_cost300)

## Multiple curves

### exponential with three parameter

In [None]:
seed = [1, 1, 1]

for clt_num in range(13):
    idx, cost, p1, p2 = cv.multi_Fitting2(cv.exponential_least3, clt_num, 
                                     eval("avg{0}".format(clt_num+1)), seed, n_param=3)
    
    y_mean1 = cv.exponential_curve3(attempts300, p1[0], p1[1], p1[2])
    y_mean2 = cv.exponential_curve3(attempts300, p2[0], p2[1], p2[2])
    
    fig, ax = plt.subplots(1, 1, figsize=(8, 8))
    
    ax.plot(attempts300, avg11, 'rx', label='average score')
    ax.plot(attempts300[:idx], y_mean1[:idx], 'b-', label='curve 1', linewidth=3)
    ax.plot(attempts300[idx-50:], y_mean2[idx-50:], 'g-', label='curve 2', linewidth=3)
    
    ax.set_ylim([0, max(avg11)+0.2])
    ax.legend(fontsize=14)
    ax.set_title("cluster {0}: cost {1}".format(clt_num+1, round(cost, 2)))
    
    plt.show()
    
'''
y_mean1 = cv.powerlaw_curve4(attempts300, param1[0], param1[1], param1[2], param1[3])
y_mean2 = cv.powerlaw_curve4(attempts300, param2[0], param2[1], param2[2], param2[3])
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
'''
#ax.set_xlim([0, max()+1])

#fig.savefig(file_path, dpi=100)


In [None]:
seed = [1, 1, 1]
idx1, idx2, cost, p1, p2, p3 = cv.multi_curveFitting_3(cv.exponential_least3, avg11, seed, n_param=3)

In [None]:
y_mean1 = cv.exponential_curve3(attempts300, p1[0], p1[1], p1[2])
y_mean2 = cv.exponential_curve3(attempts300, p2[0], p2[1], p2[2])
y_mean3 = cv.exponential_curve3(attempts300, p3[0], p3[1], p3[2])

fig, ax = plt.subplots(1, 1, figsize=(8, 8))

ax.plot(attempts300, avg11, 'rx', label='average score')
ax.plot(attempts300[:idx1], y_mean1[:idx1], 'b-', label='curve 1', linewidth=3)
ax.plot(attempts300[idx1:idx1+idx2], y_mean2[idx1:idx1+idx2], 'g-', label='curve 2', linewidth=3)
ax.plot(attempts300[idx1+idx2:], y_mean3[idx1+idx2:], 'c-', label='curve 3', linewidth=3)

ax.set_ylim([0, max(avg11)+0.2])
ax.legend(fontsize=14)
#ax.set_title("cluster {0}: cost {1}".format(clt_num+1, round(cost, 2)))

plt.show()

print(idx1, idx2)

In [None]:
def multi_curveFitting_3(least_func, avg, seed, n_param=2):
    cost = []
    idx_mid2 = [] # save idx2(second change)
    x_range = np.linspace(1, 300, 300)
    
    param1 = np.ones((n_param, 300))*np.nan
    param2 = np.ones((n_param, 300, 300))*np.nan
    param3 = np.ones((n_param, 300, 300))*np.nan
    
    for n1 in range(300): # iteration for all data
        print("iter ", n1)
        x1 = x_range[:n1+1]
        y1 = avg[:n1+1]

        lsq1 = least_squares(least_func, seed, args=(x1, y1))
        cost1 = lsq1.cost
        param1[:, n1] = lsq1.x 
        
        cost_remain = []        
        for n2 in range(300-n1):
            x2 = x_range[n1+1:n2+n1+2]
            x3 = x_range[n1+n2+2:]
            # print("x1:{0}, x2:{1}, x3:{2}".format(x1, x2, x3))

            y2 = avg[n1+1:n1+n2+2]
            y3 = avg[n1+n2+2:]
            
            lsq2 = least_squares(least_func, seed, args=(x2, y2))
            lsq3 = least_squares(least_func, seed, args=(x3, y3))
    
            cost_remain.append(lsq2.cost+lsq3.cost)

            param2[:, n1, n2] = lsq2.x
            param3[:, n1, n2] = lsq3.x
    
        idx2 = np.argmin(cost_remain)
        idx_mid2.append(idx2)
        cost.append(cost1+cost_remain[idx2])
    
    idx1 = np.argmin(cost)
    idx2 = idx_mid2[idx1]
    
    return idx1, idx2, cost[idx1], param1[:, idx1], param2[:, idx1, idx2], param3[:, idx1, idx2]

seed = [1, 1, 1]
idx1, idx2, cost, p1, p2, p3 = multi_curveFitting_3(cv.exponential_least3, avg11, seed, n_param=3)

In [None]:
def multi_curveFitting_3(least_func, avg, seed, n_curve=2):
    cost = []
    idx_mid2 = [] # save idx2(second change)
    
    x_range = np.linspace(1, 300, 300)
    min_range = 5
    
    end1 = 0
    end2 = 0
    
    #x = np.ones(n_curve)*np.nan

    '''
    for i in range(n_curve):
        locals()["x{0}".format(i)] = np.nan
    '''
    for n in range(300): # iteration for all data
        print("iter ", n)
        for clt in range(n_curve):
            if clt == 0:
                #print('first')
                locals()["x{0}".format(clt)] = x_range[:min_range+n]
                end1 = min_range+n
                print('x0:', eval('x0'))
            
            for j in range(300-n):
                if 0 < clt < n_curve-1:
                    #print('middle')
                    end2 = min_range + end1 + j
                    name = "x{0}".format(clt)
                    locals()[name] = x_range[end1:end2]
                    print('x{0}: {1}'.format(clt, eval(name)))
                    end1 = end2
                    clt = clt+1
                elif clt == n_curve-1:
                    #print('last')
                    locals()["x{0}".format(clt)] = x_range[end2:]
                    print('x3:', eval('x3'))

                    #print("x0:{0}, x1:{1}, x2:{2}".format(eval('x0'), eval('x1'), eval('x2')))

            #print(eval('x0'), eval('x1'))
seed = [1, 1, 1, 1]
multi_curveFitting_3(cv.powerlaw_least4, avg2, seed, n_curve=4)

iter  0
x0: [ 1.  2.  3.  4.  5.]
x1: [  6.   7.   8.   9.  10.]
x2: [ 11.  12.  13.  14.  15.  16.]
x3: [  17.   18.   19.   20.   21.   22.   23.   24.   25.   26.   27.   28.
   29.   30.   31.   32.   33.   34.   35.   36.   37.   38.   39.   40.
   41.   42.   43.   44.   45.   46.   47.   48.   49.   50.   51.   52.
   53.   54.   55.   56.   57.   58.   59.   60.   61.   62.   63.   64.
   65.   66.   67.   68.   69.   70.   71.   72.   73.   74.   75.   76.
   77.   78.   79.   80.   81.   82.   83.   84.   85.   86.   87.   88.
   89.   90.   91.   92.   93.   94.   95.   96.   97.   98.   99.  100.
  101.  102.  103.  104.  105.  106.  107.  108.  109.  110.  111.  112.
  113.  114.  115.  116.  117.  118.  119.  120.  121.  122.  123.  124.
  125.  126.  127.  128.  129.  130.  131.  132.  133.  134.  135.  136.
  137.  138.  139.  140.  141.  142.  143.  144.  145.  146.  147.  148.
  149.  150.  151.  152.  153.  154.  155.  156.  157.  158.  159.  160.
  161.  162.  163. 

In [31]:
def multi_curveFitting_3(least_func, avg, seed, n_curve=2):
    seperate_x = len(avg)/n_curve
    end = 0
    for clt in range(n_curve):
        if clt == 0:
            locals()["x{0}".format(clt)] = avg[:seperate_x]
            end = seperate_x
        elif clt == n_curve-1:
            locals()["x{0}".format(clt)] = avg[end:]
        else:
            locals()["x{0}".format(clt)] = avg[end:end+seperate_x]
            end = end+seperate_x
    
    print(eval('x0'), eval('x1'), eval('x2'))
    
seed = [1, 1, 1, 1]
multi_curveFitting_3(cv.powerlaw_least4, avg11, seed, n_curve=3)



NameError: name 'x1' is not defined

In [26]:
round(300/7)

43