In [2]:
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd
from sklearn.pipeline import Pipeline
import pkg_resources
import glob
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np

  import pkg_resources


In [3]:
file_path = {
    "2013_Targa_Sixty_Six": "data/2013_Targa_Sixty_Six",
    "2014_Targa_Sixty_Six": "data/2014_Targa_Sixty_Six",
    "2013_Laguna_Seca": "data/2013_Laguna_Seca",
}

dflist = []

for i, fname in enumerate(sorted(glob.glob("data/2013_Targa_Sixty_Six" + "/*.csv"))):
    dflist.append(pd.read_csv(fname, sep=","))

testlist = [dflist[0][:5], dflist[1][:5]]

In [5]:
class ApplyThreshold(BaseEstimator, TransformerMixin):
    
    def __init__(self, threshold, seq_length=1, threshold_as_upper_bound=False):
        self.threshold = threshold
        self.seq_length = seq_length
        self.threshold_as_upper_bound = threshold_as_upper_bound

    def fit(self, X, y=None):
        return self

    def _threshold_condition(self, df):
        if self.threshold_as_upper_bound:
            return df["vxCG"] < self.threshold
        else:
            return df["vxCG"] > self.threshold

    def _process_group(self, group):
        if len(group) > self.seq_length:
            return group

    def transform(self, dflist):
        new_dflist = []
        for df in dflist:
            df_mask = self._threshold_condition(df)
            groups = df[df_mask].groupby((~df_mask).cumsum())
            new_data = [self._process_group(group) for _, group in groups if self._process_group(group) is not None]
            new_dflist.extend(new_data)

        return new_dflist

class _ConcatDataFrames(BaseEstimator,TransformerMixin):

    def fit(self, dflist, y = None):
        return self
    
    def transform(self, dflist):
        return pd.concat(dflist, keys = np.arange(0,len(dflist),1))

class _SeparateDataFrames(BaseEstimator,TransformerMixin):

    def fit(self, dflist, y = None):
        return self
    
    def transform(self, df):
        return [df.xs(i) for i in df.index.get_level_values(0).unique().to_list()]
    

class PL_StandardScaler(BaseEstimator,TransformerMixin):

    def __init__(self):
        self.scaler = StandardScaler().set_output(transform="pandas")
        return None

    def fit(self, dflist, y = None):
        self.scaler._reset()
        for df in dflist:
            self.scaler.partial_fit(df)
        return self
    
    def transform(self, dflist):
        scaled_df_list = []
        for df in dflist:
            scaled_df_list.append(self.scaler.transform(df))
        return scaled_df_list

class PL_MinMaxScaler(BaseEstimator,TransformerMixin):

    def __init__(self):
        self.scaler = MinMaxScaler().set_output(transform="pandas")
        return None

    def fit(self, dflist, y = None):
        self.scaler._reset()
        for df in dflist:
            self.scaler.partial_fit(df)
        return self
    
    def transform(self, dflist):
        scaled_df_list = []
        for df in dflist:
            scaled_df_list.append(self.scaler.transform(df))
        return scaled_df_list

In [12]:
testlist[0]

Unnamed: 0,sideSlip,vxCG,vyCG,time,longitude,latitude,engineSpeed,handwheelAngle,throttle,brake,axCG,ayCG,yawRate,chassisAccelFL,chassisAccelFR,chassisAccelRL,chassisAccelRR
0,0.0,0.0,-0.0,0.0,,,210,-22.3,0.5,0.0,0.04,-0.2,-0.21,9.56,9.61,9.01,9.14
1,0.0,0.0,-0.0,0.05,,,210,-22.2,0.5,0.0,-0.09,-0.03,-0.03,9.58,9.64,9.16,9.25
2,0.0,0.0,-0.0,0.1,,,210,-22.3,0.5,0.0,-0.03,-0.1,0.11,9.6,9.61,9.07,9.14
3,0.0,0.0,-0.0,0.15,,,210,-22.2,0.5,0.0,-0.02,-0.09,0.11,9.61,9.62,9.01,9.1
4,0.0,0.0,-0.0,0.2,,,210,-22.2,0.5,0.0,0.01,-0.14,-0.35,9.59,9.59,9.06,9.2


In [11]:
pipeline = Pipeline([('threshold', ApplyThreshold(threshold=10, seq_length=10))])
new_df_list = pipeline.fit_transform(testlist)

In [12]:
new_df_list

[]

In [13]:
pip1 = Pipeline([("test",concatDataFrames())])
concatdf = pip1.transform(testlist)

In [14]:
concatdf

Unnamed: 0,Unnamed: 1,sideSlip,vxCG,vyCG,time,longitude,latitude,engineSpeed,handwheelAngle,throttle,brake,axCG,ayCG,yawRate,chassisAccelFL,chassisAccelFR,chassisAccelRL,chassisAccelRR
0,0,0.0,0.0,-0.0,0.0,,,210,-22.3,0.5,0.0,0.04,-0.2,-0.21,9.56,9.61,9.01,9.14
0,1,0.0,0.0,-0.0,0.05,,,210,-22.2,0.5,0.0,-0.09,-0.03,-0.03,9.58,9.64,9.16,9.25
0,2,0.0,0.0,-0.0,0.1,,,210,-22.3,0.5,0.0,-0.03,-0.1,0.11,9.6,9.61,9.07,9.14
0,3,0.0,0.0,-0.0,0.15,,,210,-22.2,0.5,0.0,-0.02,-0.09,0.11,9.61,9.62,9.01,9.1
0,4,0.0,0.0,-0.0,0.2,,,210,-22.2,0.5,0.0,0.01,-0.14,-0.35,9.59,9.59,9.06,9.2
1,0,0.0,-0.01,0.0,0.0,-80.306057,26.920794,2178,-13.6,2.2,0.0,0.22,0.57,-0.06,10.91,10.04,10.06,9.76
1,1,0.0,-0.01,-0.01,0.05,-80.306057,26.920794,2088,-13.7,2.2,0.0,-0.28,0.04,0.17,7.85,10.85,8.88,8.6
1,2,0.0,-0.01,-0.02,0.1,-80.306057,26.920794,2112,-13.7,2.2,0.0,-0.36,-0.94,-0.04,7.64,9.06,8.24,9.06
1,3,0.0,-0.01,0.0,0.15,-80.306057,26.920794,2028,-13.6,2.3,0.0,0.09,0.24,-0.13,8.67,8.82,9.02,9.3
1,4,0.0,-0.01,0.01,0.2,-80.306057,26.920794,2148,-13.5,2.3,0.0,0.08,0.24,-0.04,13.5,7.96,8.9,9.13


In [15]:
pip2 = Pipeline([("test",separateDataFrames())])
dflist = pip2.transform(concatdf)

In [16]:
dflist[0]

Unnamed: 0,sideSlip,vxCG,vyCG,time,longitude,latitude,engineSpeed,handwheelAngle,throttle,brake,axCG,ayCG,yawRate,chassisAccelFL,chassisAccelFR,chassisAccelRL,chassisAccelRR
0,0.0,0.0,-0.0,0.0,,,210,-22.3,0.5,0.0,0.04,-0.2,-0.21,9.56,9.61,9.01,9.14
1,0.0,0.0,-0.0,0.05,,,210,-22.2,0.5,0.0,-0.09,-0.03,-0.03,9.58,9.64,9.16,9.25
2,0.0,0.0,-0.0,0.1,,,210,-22.3,0.5,0.0,-0.03,-0.1,0.11,9.6,9.61,9.07,9.14
3,0.0,0.0,-0.0,0.15,,,210,-22.2,0.5,0.0,-0.02,-0.09,0.11,9.61,9.62,9.01,9.1
4,0.0,0.0,-0.0,0.2,,,210,-22.2,0.5,0.0,0.01,-0.14,-0.35,9.59,9.59,9.06,9.2


In [6]:
testlist[0]

Unnamed: 0,sideSlip,vxCG,vyCG,time,longitude,latitude,engineSpeed,handwheelAngle,throttle,brake,axCG,ayCG,yawRate,chassisAccelFL,chassisAccelFR,chassisAccelRL,chassisAccelRR
0,0.0,0.0,-0.0,0.0,,,210,-22.3,0.5,0.0,0.04,-0.2,-0.21,9.56,9.61,9.01,9.14
1,0.0,0.0,-0.0,0.05,,,210,-22.2,0.5,0.0,-0.09,-0.03,-0.03,9.58,9.64,9.16,9.25
2,0.0,0.0,-0.0,0.1,,,210,-22.3,0.5,0.0,-0.03,-0.1,0.11,9.6,9.61,9.07,9.14
3,0.0,0.0,-0.0,0.15,,,210,-22.2,0.5,0.0,-0.02,-0.09,0.11,9.61,9.62,9.01,9.1
4,0.0,0.0,-0.0,0.2,,,210,-22.2,0.5,0.0,0.01,-0.14,-0.35,9.59,9.59,9.06,9.2


In [22]:
pip3 = Pipeline([("test",PL_StandardScaler())])
scaled_df = pip3.fit_transform(testlist)

  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


In [32]:

pd.concat([scaled_df[0], scaled_df[1]]).describe()

Unnamed: 0,sideSlip,vxCG,vyCG,time,longitude,latitude,engineSpeed,handwheelAngle,throttle,brake,axCG,ayCG,yawRate,chassisAccelFL,chassisAccelFR,chassisAccelRL,chassisAccelRR
count,10.0,10.0,10.0,10.0,0.0,0.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,0.0,0.0,-8.881784000000001e-17,-2.2204460000000003e-17,,,-1.110223e-16,-6.661338000000001e-17,1.332268e-16,0.0,7.771561000000001e-17,4.4408920000000007e-17,2.3592240000000002e-17,4.4408920000000007e-17,-7.993606e-16,-4.607426e-16,-3.330669e-15
std,0.0,1.054093,1.054093,1.054093,,,1.054093,1.054093,1.054093,0.0,1.054093,1.054093,1.054093,1.054093,1.054093,1.054093,1.054093
min,0.0,-1.0,-2.405351,-1.414214,,,-0.9992659,-1.013812,-0.9992082,0.0,-1.982358,-2.413834,-2.032642,-1.280313,-2.10705,-1.918328,-2.120407
25%,0.0,-1.0,0.2672612,-0.7071068,,,-0.9992659,-0.9906126,-0.9992082,0.0,-0.249315,-0.2389669,-0.4393994,-0.4829027,-0.3985375,-0.271823,-0.2258532
50%,0.0,0.0,0.2672612,0.0,,,-0.04352863,-0.004639872,-0.0229703,0.0,0.1763447,-0.0510154,0.04695872,-0.04201922,0.1802083,-0.06226781,-0.1045271
75%,0.0,1.0,0.2672612,0.7071068,,,0.9942191,0.9987324,0.9532676,0.0,0.6324088,0.6202398,0.8184234,-0.02769449,0.2148637,0.06346527,0.2594512
max,0.0,1.0,1.603567,1.414214,,,1.069921,1.027732,1.068119,0.0,1.544537,1.640548,1.45572,2.450485,1.899118,2.440419,2.210002
