In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import sys
sys.path.append(r'.\py_files_wurm')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import InterpolatedUnivariateSpline , interp1d, UnivariateSpline, LSQUnivariateSpline
from brake_model import BrakeTest
from rule_generator import BinaryRuleExaminer, RuleGenerator
import preprocess_data
import seaborn.apionly as sns
import time
import pickle
from IPython.core.display import display, HTML, Image
display(HTML("<style>.container { width:100% !important; }</style>"))
import math
from helper import *
from scipy.signal import wiener, medfilt
from scipy.signal import butter, lfilter, freqz, filtfilt
from scipy.fftpack import fft
from scipy.fftpack import rfft, irfft, fftfreq
import os



In [4]:
def find_min_time(data):
    res = data[0]['time'].iloc[0]
    for d in data:
        if(d['time'].iloc[0]<res):
            res = d['time'].iloc[0]
    return res

def find_max_time(data):
    res = 0
    for d in data:
        if(d['time'].iloc[-1]>res):
            res = d['time'].iloc[-1]
    return res

In [6]:
def extract_data_search(path, sep=' ', freq=1, fil=True, cutoff=1, sampleFreq=100, order=5):
    data = []
    file_names = []
    for file in os.listdir(path):
        filename, file_extension = os.path.splitext(file)
        if file_extension == '.data':
            df = pd.read_csv(os.path.join((path + file)), sep=sep, header=None, names=['time', 'value'])
            data.append(df)
            file_names.append(filename)
    min_time = find_min_time(data)
    max_time = find_max_time(data)
    points = pd.DataFrame()
    slope = pd.DataFrame()
    for i in range(0, len(data)):
        f = InterpolatedUnivariateSpline(data[i]['time'].values, data[i]['value'].values)
        dx = f.derivative()
        points[file_names[i]] = f(np.arange(int(min_time), int(max_time), freq))
        slope[file_names[i]] = dx(np.arange(int(min_time), int(max_time), freq))
    points = points.T
    slope = slope.T
    points.sort_index(inplace=True)
    slope.sort_index(inplace=True)
    return points, slope

In [7]:
points, slope = extract_data_search(path='../3731/', sep=' ', fil=True, freq=10, cutoff=0.05)

In [8]:
print(points)

       0        1        2        3        4        5        6        7      \
l    10005.0  10005.0  10005.0  10005.0  10005.0  10005.0  10005.0  10005.0   
v    10000.0  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0   
vlh  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0   
vlv  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0   
vrh  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0   
vrv  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0  10000.0   

       8        9       ...       21765    21766    21767    21768    21769  \
l    10005.0  10005.0   ...      9818.0   9818.0   9818.0   9818.0   9818.0   
v    10000.0  10000.0   ...     10000.0  10000.0  10000.0  10000.0  10000.0   
vlh  10000.0  10000.0   ...     10000.0  10000.0  10000.0  10000.0  10000.0   
vlv  10000.0  10000.0   ...     10000.0  10000.0  10000.0  10000.0  10000.0   
vrh  10000.0  10000.0   ...     10000.0  10000.0  1

In [9]:
#momentan noch nicht verwendet
def extract_rules(df, rules):
    res = pd.DataFrame(columns = df.columns)
    for rule in rules:
        res = res.append(df.query('name1 == "'+rule[0]+'" and name2 == "'+rule[1]+'" and dep == "'+rule[2]+'"' ))
    return res

In [11]:
def get_rule_set(points, slope, window, lap):
    ruleset = []
    for i in range(0, len(points.columns), lap):
        slicepoints = points.iloc[:,i:i+window]
        sliceslope = points.iloc[:,i:i+window]
#        print('for start index ' + str(i))
#        print (slicepoints)
#        print (sliceslope)
        brs = RuleGenerator(n_jobs=-1)
        brs.data_from_frame(slicepoints, sliceslope)
        brs.fit(delta=3, alpha1=0.9, beta=0.9, lag=100, all=False)
        ruleset.append(brs)
        
    return ruleset

In [12]:
def extract_slices2(window, lap, th=0.9):
    rule_set = get_rule_set(points, slope, window, lap)
    df = pd.DataFrame(columns= rule_set[1].brs.columns)
    df['slice'] = ""
    slices = pd.DataFrame(columns=['start', 'stop'])
    for i in range(0, len(rule_set)):
        of_int = rule_set[i].brs
        of_int = of_int.loc[of_int['alpha']>th]
        if(of_int.iloc[0]['s1_mean']>=0):
            of_int['start'] = i*lap
            of_int['stop'] = i*lap+window
            df = df.append(of_int)
    df.reset_index()
    slices['start'] = df['start'].drop_duplicates()
    slices['stop'] = df['stop'].drop_duplicates()
    return slices

In [13]:
lap = 500
window = 1000
rule_set = get_rule_set(points, slope, window, lap)
df = pd.DataFrame(columns= rule_set[1].brs.columns)
df['slice'] = ""
slices = pd.DataFrame(columns=['start', 'stop'])

In [14]:
for i in range(0, len(rule_set)):
    of_int = rule_set[i].brs
    #of_int = of_int.loc[of_int['alpha']>0.9]
    if(of_int.iloc[0]['s1_mean']>=0):
        of_int['start'] = i*lap
        of_int['stop'] = i*lap+window
        df = df.append(of_int)

In [15]:
df.reset_index()

Unnamed: 0,index,a_e,a_f,a_n,alpha,c_e,c_f,c_n,dep,l,...,t2,theta,theta_e,theta_f,theta_n,tor,v1_mean,v1_std,v2_mean,v2_std
0,0,0.001001,0.001001,0.001001,1.0,1,1,1,D,1,...,1,6.907755,0.0,0.0,0.0,0.998000,10005.725040,4.470421e-01,10000.000000,2.277421e-12
1,1,0.001001,0.001001,0.001001,1.0,1,1,1,D,1,...,2,6.907755,0.0,0.0,0.0,0.998000,10005.725040,4.470421e-01,10000.000000,2.360263e-12
2,2,0.001001,0.001001,0.001001,1.0,1,1,1,D,1,...,3,6.907755,0.0,0.0,0.0,0.998000,10005.725040,4.470421e-01,10000.000000,2.360263e-12
3,3,0.001001,0.001001,0.001001,1.0,1,1,1,D,1,...,4,6.907755,0.0,0.0,0.0,0.998000,10005.725040,4.470421e-01,10000.000000,2.360263e-12
4,4,0.001001,0.001001,0.001001,1.0,1,1,1,D,1,...,5,6.907755,0.0,0.0,0.0,0.998000,10005.725040,4.470421e-01,10000.000000,2.360263e-12
5,5,0.001001,0.001001,0.001001,1.0,1,1,1,D,1,...,0,6.907755,0.0,0.0,0.0,0.998000,10000.000000,2.295528e-12,10005.726041,4.465368e-01
6,6,0.001001,0.001001,0.001001,1.0,1,1,1,D,1,...,2,6.907755,0.0,0.0,0.0,0.998000,10000.000000,2.295528e-12,10000.000000,2.360263e-12
7,7,0.001001,0.001001,0.001001,1.0,1,1,1,D,1,...,3,6.907755,0.0,0.0,0.0,0.998000,10000.000000,2.295528e-12,10000.000000,2.360263e-12
8,8,0.001001,0.001001,0.001001,1.0,1,1,1,D,1,...,4,6.907755,0.0,0.0,0.0,0.998000,10000.000000,2.295528e-12,10000.000000,2.360263e-12
9,9,0.001001,0.001001,0.001001,1.0,1,1,1,D,1,...,5,6.907755,0.0,0.0,0.0,0.998000,10000.000000,2.295528e-12,10000.000000,2.360263e-12


In [16]:
df.to_csv(path_or_buf = r'..\3731\df1000.csv')


In [17]:
slices['start'] = df['start'].drop_duplicates()

In [18]:
slices['stop'] = df['stop'].drop_duplicates()

In [19]:
slices

Unnamed: 0,start,stop
0,0.0,1000.0
0,500.0,1500.0
0,1000.0,2000.0
0,1500.0,2500.0
0,2000.0,3000.0
0,2500.0,3500.0
0,3000.0,4000.0
0,3500.0,4500.0
0,4000.0,5000.0
0,4500.0,5500.0


In [20]:
def fuse_slices(slice_list):
    res = []
    i = 0
    while i <len(slice_list):
        stop = False
        j=1
        while(not stop):
            if j == 1:
                tmp =[slice_list[i+j-1][0],slice_list[i+j-1][1]]
            if(i+j<len(slice_list) and tmp[1] >= slice_list[j+i][0] ):
                tmp = ([slice_list[i][0], slice_list[i+j][1]])
                j+=1
            else:
                stop=True
                res.append(tmp)
                i = i+j
    df = pd.DataFrame(res, columns=['start', 'stop'])
    return df

In [21]:
sl = extract_slices2(1000, 500)

In [22]:
slist = sl.values.tolist()

In [23]:
fs = fuse_slices(slist)

In [24]:
fs

Unnamed: 0,start,stop
0,0.0,22500.0
