In [6]:
import numpy as np
from scipy.optimize import minimize
import time
import pandas as pd 
import json
import pickle

In [7]:
def tr_fun(x,params):
    '''
    x: scalar,x-coordinate
    params: ndarray,weights
    module style: a0+a1*x+a2*x^2...
    '''
    x = np.array([x**i for i in range(len(params))])
    ret = np.dot(x,params)
    return ret

def loss_fun(x,y,params):

    return (tr_fun(x,params)-y)**2

def cost_fun(params,X,Y):
    '''
    X: ndarray, xs in the interval
    Y: ndarray, ys in the interval
    C: penalty for L2 regulation
    '''
    
    cost = 0
    penalty = 0
    
    one_arr = np.ones(len(X))
    loss_arr = np.array([loss_fun(X[i],Y[i],params) for i in range(len(X))])
    cost = np.dot(loss_arr,one_arr) / len(X)
    
    penalty_arr = np.array([params[i]**2 for i in range(len(params))])
    penalty = C * np.dot(penalty_arr,one_arr)
    
    cost += penalty
    return cost

def cost_minimizer(X,Y,params):
    res = minimize(cost_fun,params,args=(X,Y),method='Nelder-Mead', tol=1e-5)
    return res.x


In [8]:
# sampling
# in :df
def sample(data):
    sampling_data = np.zeros((len(data)//SAMPLING_RATE,3))
    sampling_data[:,0] = [data['person_id'][i] for i in range(len(data)) if i%SAMPLING_RATE == 0]
    sampling_data[:,1] = [data['x'][i] for i in range(len(data)) if i%SAMPLING_RATE == 0]
    sampling_data[:,2] = [data['y'][i] for i in range(len(data)) if i%SAMPLING_RATE == 0]
    return sampling_data

In [9]:
'''
0.0:{'param': array([[  1.10961481e-01,   1.87371589e-01,   1.91575569e-01,
         -9.30694710e-03,   1.04808754e-04],
       [ -5.08475707e-06,  -1.53824128e-04,  -2.16731290e-03,
         -4.58150687e-04,   5.95677849e-06],
       [ -5.16950820e-06,  -1.56426817e-04,  -2.21302459e-03,
         -4.60758964e-04,   5.93261977e-06],
       [ -5.24407372e-06,  -1.58666290e-04,  -2.20234728e-03,
         -4.58758432e-04,   6.02276921e-06],
       [  9.55750244e-05,  -1.95430749e-03,  -2.80372777e-02,
         -1.22083993e-04,   1.21024965e-05],
       [  1.33961777e-04,   5.55523625e-04,  -1.50674670e-02,
         -2.61815004e-04,   7.93079416e-06],
       [  1.11720758e-04,   6.35419082e-04,  -1.62648254e-02,
         -2.73435454e-04,   9.17428821e-06],
       [  9.87005807e-05,   7.08446104e-04,  -2.30387088e-02,
         -1.91802814e-04,   1.19948478e-05],
       [  1.33718846e-04,   5.43129646e-04,  -2.96899154e-02,
          7.10191702e-06,   1.18399754e-05],
       [  2.44045184e-04,  -4.13514213e-04,  -3.20198044e-02,
          2.93375487e-06,   1.45600534e-05]]), 'x_minmax': array([[ 44.473,  46.619],
       [ 42.949,  44.102],
       [ 43.023,  43.134],
       [ 43.023,  43.45 ],
       [ 42.237,  43.323],
       [ 39.662,  41.84 ],
       [ 36.923,  39.25 ],
       [ 33.805,  36.358],
       [ 31.147,  33.382],
       [ 29.256,  30.804]])}
'''

def fitting(sampling_data):
    param_dic = {}
    i = 0
    params = np.zeros(SEGMENT_LENGTH) + 0.1
    while i < len(sampling_data):  
        X = sampling_data[i:i+SEGMENT_LENGTH,1]
        x_minmax = np.array([np.min(X),np.max(X)])
        Y = sampling_data[i:i+SEGMENT_LENGTH,2]
        tr_id = sampling_data[i,0]
        params = cost_minimizer(X,Y,params)
        if tr_id not in param_dic:
            param_dic[tr_id] = {}
            param_dic[tr_id]['param'] = [params]
            param_dic[tr_id]['x_minmax'] = [x_minmax]
        else:
            param_dic[tr_id]['param'] = np.append(param_dic[tr_id]['param'],[params],axis=0)
            param_dic[tr_id]['x_minmax'] = np.append(param_dic[tr_id]['x_minmax'],[x_minmax],axis=0)
        i+=SEGMENT_LENGTH

    for k,v in param_dic.items():
        v['param'] = v['param'].tolist()
        v['x_minmax'] = v['x_minmax'].tolist()
        
    return param_dic

### ex1——轨迹点数抽样

In [14]:
def genEx1Para():
    global SAMPLING_RATE
    nodeName = ['node1','node2','node3']
    sampleRate = [2,5,10,20,50]
    for node in nodeName:
        df = pd.read_csv('data/'+node+'.csv')
        for s in sampleRate:
            SAMPLING_RATE = s
            SAMPLING_DATA = sample(df)
            PARAM_DICT = fitting(SAMPLING_DATA)
            save_name = 'paraC1/'+node+'para'+str(s)+'.txt'
            with open(save_name,'w') as f:#dict转txt
                json.dump(json.dumps(PARAM_DICT),f)
            f.close()

In [15]:
SAMPLING_RATE = 1
POINTS_NUM_PER_TR = 500
C = 1
SEGMENT_LENGTH = 5

In [16]:
genEx1Para()

### ex2

In [21]:
SAMPLING_RATE = 5
POINTS_NUM_PER_TR = 500
C = 1
SEGMENT_LENGTH = 5

In [22]:
file_name = ['node1K5a01','node1K5a03','node1K5a05','node1K5a07','node1K5a09','node2K5a01','node2K5a03','node2K5a05','node2K5a07','node2K5a09','node3K5a01','node3K5a03','node3K5a05','node3K5a07','node3K5a09']

In [25]:
def genEx2Para():
    for name in file_name:
        df = pd.read_csv('data/'+name+'.csv')
        tr_num = len(df)/POINTS_NUM_PER_TR
        print(tr_num)
        sampling_data = sample(df)
        para_dict = fitting(sampling_data)
        with open('paraC1/'+name+'.txt','w') as f:
            json.dump(json.dumps(para_dict),f)
        f.close()

In [26]:
genEx2Para()

16.0
53.0
90.0
125.0
162.0
15.0
52.0
90.0
126.0
162.0
16.0
52.0
91.0
127.0
161.0
