In [328]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
        'Hours': pd.date_range('2018-01-01', '2018-01-10', freq='1H', closed='left'),
        'Steps': np.random.randint(100,10000, size=9*24),
        }
     )

ids = []
for i in range(1, 217):
    ids.append(i%10 + 1)
    
df["Id"] = ids

In [329]:
df

Unnamed: 0,Hours,Steps,Id
0,2018-01-01 00:00:00,1128,2
1,2018-01-01 01:00:00,429,3
2,2018-01-01 02:00:00,4189,4
3,2018-01-01 03:00:00,2148,5
4,2018-01-01 04:00:00,2148,6
...,...,...,...
211,2018-01-09 19:00:00,5899,3
212,2018-01-09 20:00:00,8635,4
213,2018-01-09 21:00:00,7688,5
214,2018-01-09 22:00:00,3242,6


In [349]:
class FilterEpochsOperator:
    
    def __init__(
        self,
        participant_id_column="id",
        ts_column="time",
        ts_filter="",
        day_filter=None,
        filter_type="include"):
        
        self.participant_id_column = participant_id_column
        self.ts_column = ts_column
        self.ts_filter = ts_filter
        self.day_filter = day_filter
        self.filter_type= filter_type    
   
    def process(self, *data_frames):
        
        processed = []
        
        for data_frame in data_frames:
            processed_df = None
   
            if type(self.ts_filter) == str:
                if self.filter_type == "include":
                    processed_df = data_frame.query(self.ts_filter)
                else:
                    processed_df = data_frame.query(f"not ({self.ts_filter})")
            else:
                if self.filter_type == "include":       
                    processed_df = data_frame[self.ts_filter(data_frame)]                     
                else:
                    processed_df = data_frame[~self.ts_filter(data_frame)]

            if self.day_filter:

                if type(self.day_filter) == dict:            
                    index = processed_df.groupby([
                        self.participant_id_column, 
                        processed_df[self.ts_column].dt.date])[self.day_filter['column']].transform(
                            self.day_filter['filter'])
                    if self.filter_type == "include":
                        processed_df = processed_df.loc[index]
                    else:
                        processed_df = processed_df.loc[~index]
                elif type(self.day_filter == list):
                    for day_filter_item in self.day_filter:
                        index = processed_df.groupby([
                            self.participant_id_column, 
                            processed_df[self.ts_column].dt.date])[day_filter_item['column']].transform(
                                day_filter_item['filter'])
                    if self.filter_type == "include":
                        processed_df = processed_df.loc[index]
                    else:
                        processed_df = processed_df.loc[~index]

            processed.append(processed_df)
            
        return processed
        

def filter_epochs_operator(
    data_frame,    
    participant_id_column="id",
    ts_column="time",
    ts_filter="",
    day_filter=None,
    filter_type="include"):
    
    processed_df = None
   
    if type(ts_filter) == str:
        if filter_type == "include":
            processed_df = data_frame.query(ts_filter)
        else:
            processed_df = data_frame.query(f"not ({ts_filter})")
    else:
        if filter_type == "include":       
            processed_df = data_frame[ts_filter(df)] 
        else:
            processed_df = data_frame[~ts_filter(df)]

    if day_filter:
        
        if type(day_filter) == dict:            
            index = processed_df.groupby([
                participant_id_column, 
                processed_df[ts_column].dt.date])[day_filter['column']].transform(
                    day_filter['filter'])
            if filter_type == "include":
                processed_df = processed_df.loc[index]
            else:
                processed_df = processed_df.loc[~index]
        elif type(day_filter == list):
            for day_filter_item in day_filter:
                index = processed_df.groupby([
                    participant_id_column, 
                    processed_df[ts_column].dt.date])[day_filter_item['column']].transform(
                        day_filter_item['filter'])
            if filter_type == "include":
                processed_df = processed_df.loc[index]
            else:
                processed_df = processed_df.loc[~index]
            
    return processed_df
       

In [350]:
operator = FilterEpochsOperator(ts_filter=lambda df: df['Steps'] < 2000, filter_type="exclude")
operator.process(df)


[                  Hours  Steps  Id
 2   2018-01-01 02:00:00   4189   4
 3   2018-01-01 03:00:00   2148   5
 4   2018-01-01 04:00:00   2148   6
 5   2018-01-01 05:00:00   8853   7
 6   2018-01-01 06:00:00   3644   8
 ..                  ...    ...  ..
 211 2018-01-09 19:00:00   5899   3
 212 2018-01-09 20:00:00   8635   4
 213 2018-01-09 21:00:00   7688   5
 214 2018-01-09 22:00:00   3242   6
 215 2018-01-09 23:00:00   6397   7
 
 [187 rows x 3 columns]]

In [351]:
df2

Unnamed: 0,Hours,Steps,Id
0,2018-01-01 00:00:00,1321,2
11,2018-01-01 11:00:00,817,3
14,2018-01-01 14:00:00,663,6
20,2018-01-01 20:00:00,1579,2
21,2018-01-01 21:00:00,1281,3
24,2018-01-02 00:00:00,850,6
29,2018-01-02 05:00:00,619,1
41,2018-01-02 17:00:00,1424,3
42,2018-01-02 18:00:00,664,4
44,2018-01-02 20:00:00,819,6


In [352]:
df3 = filter_epochs_operator(
    df, 
    participant_id_column="Id",
    ts_column="Hours",
    ts_filter=lambda df: df['Steps'] < 2000, 
    day_filter={
        "column": "Steps",
        "filter": lambda x: x.count() < 4000
    },
    filter_type="include")

In [353]:

df3

Unnamed: 0,Hours,Steps,Id
0,2018-01-01 00:00:00,1128,2
1,2018-01-01 01:00:00,429,3
33,2018-01-02 09:00:00,1974,5
43,2018-01-02 19:00:00,1953,5
58,2018-01-03 10:00:00,1809,10
68,2018-01-03 20:00:00,1332,10
72,2018-01-04 00:00:00,751,4
76,2018-01-04 04:00:00,525,8
77,2018-01-04 05:00:00,885,9
93,2018-01-04 21:00:00,466,5
