In [1]:
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd
from sklearn.pipeline import Pipeline
import pkg_resources
import glob
from sklearn.preprocessing import StandardScaler
import numpy as np

In [2]:
file_path = {
    "2013_Targa_Sixty_Six": "data/2013_Targa_Sixty_Six",
    "2014_Targa_Sixty_Six": "data/2014_Targa_Sixty_Six",
    "2013_Laguna_Seca": "data/2013_Laguna_Seca",
}

dflist = []

for i, fname in enumerate(sorted(glob.glob("data/2013_Targa_Sixty_Six" + "/*.csv"))):
    dflist.append(pd.read_csv(fname, sep=","))

testlist = [dflist[0][:5], dflist[1][:5]]

In [3]:
class ApplyThreshold(BaseEstimator, TransformerMixin):
    
    def __init__(self, threshold, seq_length=1, threshold_as_upper_bound=False):
        self.threshold = threshold
        self.seq_length = seq_length
        self.threshold_as_upper_bound = threshold_as_upper_bound

    def fit(self, X, y=None):
        return self

    def _threshold_condition(self, df):
        if self.threshold_as_upper_bound:
            return df["vxCG"] < self.threshold
        else:
            return df["vxCG"] > self.threshold

    def _process_group(self, group):
        if len(group) > self.seq_length:
            return group

    def transform(self, dflist):
        new_dflist = []
        for df in dflist:
            df_mask = self._threshold_condition(df)
            groups = df[df_mask].groupby((~df_mask).cumsum())
            new_data = [self._process_group(group) for _, group in groups if self._process_group(group) is not None]
            new_dflist.extend(new_data)

        return new_dflist

class concatDataFrames(BaseEstimator,TransformerMixin):

    def fit(self, dflist, y = None):
        return self
    
    def transform(self, dflist):
        return pd.concat(dflist, keys = np.arange(0,len(dflist),1))

class separateDataFrames(BaseEstimator,TransformerMixin):

    def fit(self, dflist, y = None):
        return self
    
    def transform(self, df):
        return [df.xs(i) for i in df.index.get_level_values(0).unique().to_list()]

In [4]:
testlist[0]

Unnamed: 0,sideSlip,vxCG,vyCG,time,longitude,latitude,engineSpeed,handwheelAngle,throttle,brake,axCG,ayCG,yawRate,chassisAccelFL,chassisAccelFR,chassisAccelRL,chassisAccelRR
0,0.0,0.0,-0.0,0.0,,,210,-22.3,0.5,0.0,0.04,-0.2,-0.21,9.56,9.61,9.01,9.14
1,0.0,0.0,-0.0,0.05,,,210,-22.2,0.5,0.0,-0.09,-0.03,-0.03,9.58,9.64,9.16,9.25
2,0.0,0.0,-0.0,0.1,,,210,-22.3,0.5,0.0,-0.03,-0.1,0.11,9.6,9.61,9.07,9.14
3,0.0,0.0,-0.0,0.15,,,210,-22.2,0.5,0.0,-0.02,-0.09,0.11,9.61,9.62,9.01,9.1
4,0.0,0.0,-0.0,0.2,,,210,-22.2,0.5,0.0,0.01,-0.14,-0.35,9.59,9.59,9.06,9.2


In [11]:
pipeline = Pipeline([('threshold', ApplyThreshold(threshold=10, seq_length=10))])
new_df_list = pipeline.fit_transform(testlist)

In [12]:
new_df_list

[]

In [13]:
pip1 = Pipeline([("test",concatDataFrames())])
concatdf = pip1.transform(testlist)

In [14]:
concatdf

Unnamed: 0,Unnamed: 1,sideSlip,vxCG,vyCG,time,longitude,latitude,engineSpeed,handwheelAngle,throttle,brake,axCG,ayCG,yawRate,chassisAccelFL,chassisAccelFR,chassisAccelRL,chassisAccelRR
0,0,0.0,0.0,-0.0,0.0,,,210,-22.3,0.5,0.0,0.04,-0.2,-0.21,9.56,9.61,9.01,9.14
0,1,0.0,0.0,-0.0,0.05,,,210,-22.2,0.5,0.0,-0.09,-0.03,-0.03,9.58,9.64,9.16,9.25
0,2,0.0,0.0,-0.0,0.1,,,210,-22.3,0.5,0.0,-0.03,-0.1,0.11,9.6,9.61,9.07,9.14
0,3,0.0,0.0,-0.0,0.15,,,210,-22.2,0.5,0.0,-0.02,-0.09,0.11,9.61,9.62,9.01,9.1
0,4,0.0,0.0,-0.0,0.2,,,210,-22.2,0.5,0.0,0.01,-0.14,-0.35,9.59,9.59,9.06,9.2
1,0,0.0,-0.01,0.0,0.0,-80.306057,26.920794,2178,-13.6,2.2,0.0,0.22,0.57,-0.06,10.91,10.04,10.06,9.76
1,1,0.0,-0.01,-0.01,0.05,-80.306057,26.920794,2088,-13.7,2.2,0.0,-0.28,0.04,0.17,7.85,10.85,8.88,8.6
1,2,0.0,-0.01,-0.02,0.1,-80.306057,26.920794,2112,-13.7,2.2,0.0,-0.36,-0.94,-0.04,7.64,9.06,8.24,9.06
1,3,0.0,-0.01,0.0,0.15,-80.306057,26.920794,2028,-13.6,2.3,0.0,0.09,0.24,-0.13,8.67,8.82,9.02,9.3
1,4,0.0,-0.01,0.01,0.2,-80.306057,26.920794,2148,-13.5,2.3,0.0,0.08,0.24,-0.04,13.5,7.96,8.9,9.13


In [15]:
pip2 = Pipeline([("test",separateDataFrames())])
dflist = pip2.transform(concatdf)

In [16]:
dflist[0]

Unnamed: 0,sideSlip,vxCG,vyCG,time,longitude,latitude,engineSpeed,handwheelAngle,throttle,brake,axCG,ayCG,yawRate,chassisAccelFL,chassisAccelFR,chassisAccelRL,chassisAccelRR
0,0.0,0.0,-0.0,0.0,,,210,-22.3,0.5,0.0,0.04,-0.2,-0.21,9.56,9.61,9.01,9.14
1,0.0,0.0,-0.0,0.05,,,210,-22.2,0.5,0.0,-0.09,-0.03,-0.03,9.58,9.64,9.16,9.25
2,0.0,0.0,-0.0,0.1,,,210,-22.3,0.5,0.0,-0.03,-0.1,0.11,9.6,9.61,9.07,9.14
3,0.0,0.0,-0.0,0.15,,,210,-22.2,0.5,0.0,-0.02,-0.09,0.11,9.61,9.62,9.01,9.1
4,0.0,0.0,-0.0,0.2,,,210,-22.2,0.5,0.0,0.01,-0.14,-0.35,9.59,9.59,9.06,9.2
