In [1]:
import pandas as pd
import numpy as np

In [3]:
# df = pd.read_csv("../Data/all.csv")
# df[:10]

In [3]:

import pandas as pd
import numpy as np
from datetime import datetime as dt

def cycle_spliter(dataframe, sort = False, to_files=False, padding = False,
                  min_duration=0):
    """
    Params
    ------
    dataframe: pandas.DataFrame
        Input data, must have "Machine_Mode", "Time_stamp" columns
    sort: boolean
        sort dataframe by Time_stamp
    to_files: boolean
        save cycle-based splited dataframe into csv files, name is cycle_[order]
    padding: boolean
        True: include one previous row before cycle start and 
        one following row after cycle end
    min_duration: integer
        minimum cycle duration (in minutes) to return
    Return: list of DataFrames of cycles
    """
    # separate local object from passed reference object
    df = dataframe.copy()
    
    df["Time_stamp"] = pd.to_datetime(df["Time_stamp"])

    # sometimes, tables are not in Time_stamp order
    if sort: 
        df.sort_values(by="Time_stamp", inplace=True)
    
    # Convert Machine_Mode to numbers. Object-types will be NaN
    df['Machine_Mode'] = pd.to_numeric(df.Machine_Mode, errors="coerce")
    
    # Drop NaN rows in Machine_mode
    if df.Machine_Mode.hasnans:
        df.drop(index=df.loc[df.Machine_Mode.isna()].index,axis=0, inplace=True)

    if "Cycle_Start_Time" not in df.columns:
        df.insert(2,"Cycle_Start_Time",np.nan)
    if "Cycle_End_Time" not in df.columns:
        df.insert(3,"Cycle_End_Time",np.nan)
        
    df.reset_index(drop=True,inplace=True)

    start_idx, start_time = 0, 0
    end_idx, end_time = 0, 0
    count, count_drop = 0, 0
    list_cycles=[]
    for this_index, this_mode in enumerate(df.Machine_Mode):
        
        try:
            next_index = this_index + 1
            next_mode = df.Machine_Mode[next_index]
        except KeyError: # end of df, stop loop
            break
            
        # define start time
        if this_mode == 0 and next_mode == 3:
            if padding:
                start_idx = this_index # val = 0
            else:
                start_idx = next_index # val = 3
            start_time = df.Time_stamp[start_idx]
            
        # define end time
        elif this_mode != 5 and next_mode == 5 and start_idx != None:
            if padding:
                end_idx = next_index # val == 5
            else:
                end_idx = this_index # val != 5
            end_time = df.Time_stamp[end_idx]
            
            # only cycles last >= min_duration are kept
            duration = round((end_time - start_time).total_seconds()/60,1)
            if duration >= min_duration:
                df.loc[start_idx:end_idx, ['Cycle_Start_Time','Cycle_End_Time']] = [start_time, end_time]
                cycle = df.loc[start_idx:end_idx]
                cycle.reset_index(drop=True,inplace=True)
                list_cycles.append(cycle)
                count += 1
                print("start time: {} \t end time: {} \t duration: {} mins".format(
                    start_time,end_time, duration))

                if to_files:
                    exec("df[start_idx:end_idx+1].to_csv('cycle_{count}.csv', index=False)".format(count=count))
            
            else:
                count_drop += 1
                
            # reset values for new cycles    
            start_idx, start_time = None, None
            end_idx, end_time = None, None
        
    print("splited into {} cycle(s)".format(count))
    if count_drop > 0:
        print("dropped {} cycle(s) that has running time "\
              "less than {} minutes".format(count_drop,min_duration))
    return list_cycles

In [4]:
df2=cycle_spliter(df,sort = False, dropna=True, to_files=True)
df2[:3]

start index: 1  		 end index: 2380
start index: 2394  		 end index: 5366
start index: 5380  		 end index: 7319
start index: 7323  		 end index: 9057
splited into 4 cycles


Unnamed: 0,Data_ID,Machine_ID,Cycle_Start_Time,Cycle_End_Time,Time_stamp,Machine_Mode,Heating1State,Heating2State,HeatingTemperature,SprinklerState,...,OutputAirHumidity,OutputAirTemperature,FanState,FanDirection,FanSpeed,Option1,Option2,Option3,Option4,Option5
1,231836,111004,2019-02-01 07:42:43.000,2019-02-01 08:25:03.000,2019-02-01 07:42:43.000,3,OFF,OFF,121,OFF,...,92.207703,89.599998,OFF,Right,1,0,5,140,1549006975000,1549006963000
2,231837,111004,2019-02-01 07:42:43.000,2019-02-01 08:25:03.000,2019-02-01 07:42:44.000,3,OFF,OFF,121,OFF,...,92.207703,89.419998,OFF,Right,1,0,5,140,1549006976000,1549006964000
3,231838,111004,2019-02-01 07:42:43.000,2019-02-01 08:25:03.000,2019-02-01 07:42:45.000,3,OFF,OFF,121,OFF,...,92.207703,89.779999,OFF,Right,1,0,5,140,1549006977000,1549006965000
