In [4]:
from data import*
from regression import*

In [2]:
# dataset_synth = generate_synth_motion_data( ['arduino_00', 'arduino_01', 'arduino_02'], save_dataset_name='synth_00', epochs=500, hiddendim=300, latentdim=300)

In [5]:
# dataset_meas_00 = load_dataset('arduino_00', resample_dt=None, as_dict=False)
# dataset_meas_01 = load_dataset('arduino_01', resample_dt=None, as_dict=True)
# dataset_meas_02 = load_dataset('arduino_02', resample_dt=None, as_dict=False)
# dataset_synth_00 = load_dataset('synth_00', resample_dt=None, as_dict=False)
# dataset_synth_01 = load_dataset('synth_01', resample_dt=None, as_dict=False)

In [None]:

####################################################################################################################################################
class DATA(object):    
    ######################################################################################################
    def __init__(self, X=[], Y=[], dataset_name=None, **params): 
        if dataset_name is not None:
            self.load( dataset_name, **params)   
        else:
            self.X = np.array(X)  
            self.Y = np.array(Y)        
        return
    ######################################################################################################
    def load( self, dataset_name, features=['synth_vind_1', 'synth_vind_2'], target='center_1'):
        
        dataset_df_list = load_dataset(dataset_name, as_dict=False)
        self.X, self.Y = list(), list()
        for data in dataset_df_list:
            x = np.zeros((len(data), len(features)))
            for i, feature in enumerate(features): x[:,i] = data[feature].to_list()
            y = np.linalg.norm( np.array( data[target].to_list() ), axis=1)
            
            self.X.append(x)
            self.Y.append(y)
        
        self.X = np.array(self.X)
        self.Y = np.array(self.Y)
        return         
    ######################################################################################################        
    def segment(self, win_size, step=None, as_df=False):
        if step is None: step = win_size        
        
        X, Y = list(), list()
        N, Nt, Nf = np.shape(self.X)
        for t in range(0, self.X.shape[1] - win_size, step): 
            X = [*X, *self.X[:,t:t+win_size,:].reshape(N,-1)]
            Y = [*Y, *self.Y[:,t+win_size]]           
        data_segmented = DATA(X, Y)
       
        if as_df:
            data_df = pd.DataFrame( np.concatenate([data_segmented.X, np.reshape(data_segmented.Y,(-1,1))], axis=1) )
            data_df.columns = [*['feature_'+str(i) for i in range(win_size*Nf)], 'target']
            return data_df

        return data_segmented            
    ######################################################################################################
    def merge(self, new_dataset):
        merged_dataset = copy.deepcopy(self)
        merged_dataset.X = np.array([*self.X, *new_dataset.X])
        merged_dataset.Y = np.array([*self.Y, *new_dataset.Y])
        return merged_dataset
    ######################################################################################################
    def select(self, idx_list):
        selected_dataset = copy.deepcopy(self)
        selected_dataset.X = self.X[idx_list]
        selected_dataset.Y = self.Y[idx_list]
        return selected_dataset
    ######################################################################################################
    def split(self, ratio):
        N = len(self.X)
        idxs = np.arange(N)
        random.shuffle(idxs)
        
        Ntrain = int(N*ratio)
        data_p1 = self.select(idxs[:Ntrain])
        data_p2 = self.select(idxs[Ntrain:])
        
        return data_p1, data_p2
    ######################################################################################################
    def mtx( self, Nt_mtx='max' ):  
        # This function padds or cuts all input data (X) to make them same length and generate matrix data(X_mtx)
        # it also nomalize data X-mean(X)
        data_mtx = copy.deepcopy(self)
        if len(np.shape(data_mtx.X))>1:  return data_mtx    

        Nd, Nf = len(self.X),  np.shape(self.X[0])[1]
        Nt_list = list()
        for x in self.X: Nt_list.append( np.shape(x)[0] )
        if type(Nt_mtx) is str: Nt = int( eval('np.' + Nt_mtx)(Nt_list) )
        else:  Nt = Nt_mtx
        data_mtx.X = np.zeros( (Nd,Nt,Nf) )
        for idx, x in enumerate(self.X): 
            # x = np.subtract(x,np.mean(x,axis=0))        
            nt = np.shape(x)[0]
            if Nt >= nt:
                data_mtx.X[idx,:,:] = np.pad( x, ((0,Nt-nt),(0,0)),'constant')
            else:
                data_mtx.X[idx,:,:] = x[:Nt,:]
        return data_mtx
    ######################################################################################################
    def bound(self, min_value=None, max_value=None):
        # This function limits the amplitude value 
        
        bounded_data = copy.deepcopy(self)
        if min_value is not None:
            for x in bounded_data.X: x[ x<min_value ] = min_value
        if max_value is not None:                
            for x in bounded_data.X: x[ x>max_value ] = max_value
        
        return bounded_data
    ######################################################################################################
    def trim(self, keep_ratio=None):
        trimmed_data = copy.deepcopy(self)
        trimmed_data.X = list()
        
        if keep_ratio is None:
            dt = 20   
            for x in self.X:     
                N = len(x)
                n1, n2 = dt, N-dt 
                xx = abs( np.diff(x))
                xx = np.sum(xx, axis=1)    
                xx = abs(np.diff(xx))
                xx /= ( np.nanmax(xx) + eps )                 
                idxs = np.where( xx > 0.5 )[0]    
                idxs1 = idxs[idxs < 0.5*N] 
                idxs2 = idxs[idxs > 0.5*N]      
                if np.any(idxs1): n1 = np.min(idxs1) + dt
                if np.any(idxs2): n2 = np.max(idxs2) - dt   
                if (n2-n1) < 0.5*N: n1, n2 = 0, N            
                trimmed_data.X.append( x[n1:n2,:] )
        else:   
            for x in self.X:
                L = int( len(x) * keep_ratio)
                trimmed_data.X.append( x[:L,:] ) 

        trimmed_data.X = np.array(trimmed_data.X)    
        return trimmed_data    
    ######################################################################################################
    def quantize(self, Qstep):        
        quantized_data = copy.deepcopy(self)
        for idx, x in enumerate(quantized_data.X): 
            quantized_data.X[idx] = Qstep * np.floor(x/Qstep)
        return quantized_data   
    ######################################################################################################
    def clean(self):
        # cleans data from NANs ! 
        cleaned_data = copy.deepcopy(self)
        for idx, x in enumerate(cleaned_data.X):
            if np.any(np.isnan(x)):
                df = pd.DataFrame(x)
                df = df.fillna(method='ffill', axis=0).bfill(axis=0)      
                cleaned_data.X[idx] = df.as_matrix()

        return cleaned_data                
    ######################################################################################################
    def filter_noise(self, window_length=5, polyorder=2):
        filtered_data = copy.deepcopy(self)
        for n, x in enumerate(self.X):
            for i in range(np.shape(x)[1]):
                filtered_data.X[n][:,i] = signal.savgol_filter(x[:,i], window_length, polyorder)        
        return filtered_data
    ######################################################################################################
    def MinMax(self):
        # Rescale data value to (0,1)
        normalized_data = copy.deepcopy(self)
        for idx, x in enumerate(normalized_data.X): 
            MIN = np.nanmin(x,axis=0)
            MAX = np.nanmax(x,axis=0)
            normalized_data.X[idx] = np.subtract(x,MIN) / ( np.subtract(MAX,MIN) + eps )
        return normalized_data    
    ######################################################################################################
    def standardize(self, scale=True):
        normalized_data = copy.deepcopy(self)
        STD = 1
        for idx, x in enumerate(normalized_data.X): 
            MEAN = np.mean(x,axis=0)
            if scale: STD = np.std(x,axis=0) + eps
            normalized_data.X[idx] = np.subtract(x,MEAN) / STD    
        return normalized_data         
####################################################################################################################################################


In [None]:
train_dataset = DATA( dataset_name='synth_00',  features=['synth_vind_1', 'synth_vind_2'], target='center_1')
test_dataset = DATA( dataset_name='arduino_02',  features=['synth_vind_1', 'synth_vind_2'], target='center_1')

In [6]:
win_size = 20
step = 10

train_dataset_sg = train_dataset.segment(win_size, step=step, as_df=True)

In [2]:
%matplotlib inline
from utils import *
from pycaret.regression import*
from sklearn.metrics import mean_squared_error


KeyboardInterrupt: 

In [None]:
exp_reg = setup(train_dataset_sg,
                train_size = .7,
                target = 'target', 
#                 normalize = True, 
                silent = True,
                session_id = 20
                )   

In [13]:
model = tune_model('rf')

IntProgress(value=0, description='Processing: ', max=16)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE


In [None]:
plot_model(model, plot='error')
# plot_model(model, plot='feature')

In [None]:
test_dataset = get_dataset( 'arduino_02',  features=['vind_1', 'vind_2'], target='center_1')
test_dataset_mtx = test_dataset.mtx(Nt_mtx='max')
np.shape(test_dataset_mtx.X)
# train_dataset_sg = train_dataset.segment(win_size, step=step, as_df=True)

In [None]:

prediction = predict_model(model, data=validation_data_segmented[3])
smoothed_pred = signal.savgol_filter( prediction.Label, window_length=31, polyorder=1)  

ax = prediction.plot(y='target', legend=False)
prediction.plot(y='Label', ax=ax)
plt.plot(smoothed_pred)
plt.legend(['True Value', 'Prediction', 'Smoothed Prediction'])
plt.show()

In [64]:
compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Extra Trees Regressor,0.0273,0.0014,0.0378,0.8552,0.0276,0.0856
1,CatBoost Regressor,0.0282,0.0015,0.0381,0.8524,0.028,0.0888
2,Random Forest,0.028,0.0015,0.0387,0.8478,0.0284,0.0878
3,Light Gradient Boosting Machine,0.0285,0.0015,0.0388,0.847,0.0284,0.0892
4,Extreme Gradient Boosting,0.032,0.0019,0.0436,0.8074,0.0319,0.1004
5,Gradient Boosting Regressor,0.0321,0.0019,0.0436,0.807,0.0319,0.1006
6,K Neighbors Regressor,0.0325,0.0021,0.0458,0.787,0.0337,0.1059
7,AdaBoost Regressor,0.043,0.0028,0.053,0.7149,0.0391,0.1348
8,Support Vector Machine,0.0455,0.0031,0.0555,0.688,0.0413,0.1488
9,Decision Tree,0.0395,0.0031,0.0556,0.686,0.0406,0.1213


In [12]:
# 