In [72]:
import datetime
from datetime import datetime
from datetime import timedelta
from typing import List, Union, Dict
import pickle
import pandas as pd
import numpy as np

# Import
df = pd.read_csv("headPenFull.csv")

# New Time - Concatenate Date & Time fields into 1 proper datetime field
# making a DateTime column of the datetime type for easier manipulation
df["DateTime"] = df.apply(
    lambda x: datetime.strptime(f"{x['Date']} {x['Time']}", "%d/%m/%Y %H:%M:%S"),
    axis=1,
)

# Remove bad Date & Time columns
df = df.drop(["Date", "Time"], axis=1)

# Get a group count to give us an anchor for iterative millisecond additions
df["annotation_group_step"] = df.groupby("DateTime").cumcount()

# Group each datetime (each have the same second) and iteratively add 1/25 of a second
df["DateTime"] = (
    df.groupby("DateTime")
    .apply(
        lambda x: x.DateTime
        + (timedelta(milliseconds=(1000 / x.shape[0])) * x.annotation_group_step)
    )
    .reset_index(drop=True)
)

df

Unnamed: 0.1,Unnamed: 0,TagID,Camera date,Camera time,Behavior,accX,accY,accZ,Temp. (?C),Battery Voltage (V),Metadata,DateTime,annotation_group_step
0,0,Pen11_AXY#1_S1,na,na,n,-0.016,-0.219,0.985,26,0,0,2021-05-17 16:07:35.000000,0
1,1,Pen11_AXY#1_S1,na,na,n,-0.016,-0.203,0.985,0,0,0,2021-05-17 16:07:35.111111,1
2,2,Pen11_AXY#1_S1,na,na,n,-0.016,-0.203,0.985,0,0,0,2021-05-17 16:07:35.222222,2
3,3,Pen11_AXY#1_S1,na,na,n,-0.016,-0.203,0.985,0,0,0,2021-05-17 16:07:35.333333,3
4,4,Pen11_AXY#1_S1,na,na,n,-0.016,-0.203,0.969,0,0,0,2021-05-17 16:07:35.444444,4
5,5,Pen11_AXY#1_S1,na,na,n,-0.016,-0.203,0.969,0,0,0,2021-05-17 16:07:35.555555,5
6,6,Pen11_AXY#1_S1,na,na,n,-0.016,-0.203,0.985,0,0,0,2021-05-17 16:07:35.666666,6
7,7,Pen11_AXY#1_S1,na,na,n,-0.016,-0.219,0.969,0,0,0,2021-05-17 16:07:35.777777,7
8,8,Pen11_AXY#1_S1,na,na,n,-0.016,-0.203,0.985,0,0,0,2021-05-17 16:07:35.888888,8
9,0,Pen11_AXY#1_S1,na,na,n,-0.016,-0.219,0.985,26,0,0,2021-05-17 16:07:36.000000,0


# Get behavior counts

In [75]:
df['Behavior'].value_counts()

n    18
Name: Behavior, dtype: int64

In [92]:
def pull_windows(df, slide: int = 1, window_length: int = 15):
    """ Pull matrix window """
    
    # production 
    if window_length > df.shape[0]:
        raise ValueError('Window larger than data given')
    
    windows = []
    number_of_rows_minus_window = df.shape[0] - window_length + 1
    
    for i in range(0, number_of_rows_minus_window, slide):
        window = df[i: i + window_length]
            
        # check if all behaviors are the same; if not ignore window
        if len(set(window.Behavior)) != 1:
            continue
        
        # check if times are uniform -- equal length; if not ignore window
        # if len(set(np.ediff1d(window.DateTime))) != 1:
        #     continue
        
        windows.append(window)
        
    return windows

    
def contruct_train_test(windows):
    positions = ['accX', 'accY', 'accZ']
    Xtrain, Ytrain = [], []  # have to be the same length
    # TODO - give each classifier its own index and the length should be the numpy of classifiers
    onehot = {
        'n': [0, 0, 0, 1],  # Example: there there is only 4 classifiers in this example and we assigned 'n' to index 3
    }
    for window in windows:
        Xtrain.append(window[positions].to_numpy())
        Ytrain.append(onehot[window.Behavior.values[0]])
        
    return np.stack(Xtrain), Ytrain
    
    
windows: List[pd.DataFrame] = pull_windows(df, slide=1, window_length=10)

Xtrain, Ytrain = contruct_train_test(windows)
Xtrain[0], Ytrain[0]

(array([[-0.016, -0.219,  0.985],
        [-0.016, -0.203,  0.985],
        [-0.016, -0.203,  0.985],
        [-0.016, -0.203,  0.985],
        [-0.016, -0.203,  0.969],
        [-0.016, -0.203,  0.969],
        [-0.016, -0.203,  0.985],
        [-0.016, -0.219,  0.969],
        [-0.016, -0.203,  0.985],
        [-0.016, -0.219,  0.985]]),
 [0, 0, 0, 1])

# Save preprocessing 

In [104]:
with open('input.pkl', 'wb') as outfile:
    pickle.dump((Xtrain, Ytrain), outfile, pickle.HIGHEST_PROTOCOL)

# Read preprocessing

In [105]:
with open('input.pkl', 'rb') as infile:
    Xtrain, Ytrain = pickle.load(infile)

Xtrain[0], Ytrain[0]

(array([[-0.016, -0.219,  0.985],
        [-0.016, -0.203,  0.985],
        [-0.016, -0.203,  0.985],
        [-0.016, -0.203,  0.985],
        [-0.016, -0.203,  0.969],
        [-0.016, -0.203,  0.969],
        [-0.016, -0.203,  0.985],
        [-0.016, -0.219,  0.969],
        [-0.016, -0.203,  0.985],
        [-0.016, -0.219,  0.985]]),
 [0, 0, 0, 1])

# create model 

In [None]:
def create_model(Xtrain, *args, *kwargs):
    pass

model = create_model(Xtrain)

# train model

In [None]:
model = model.fit(Xtrain, Ytrain)
model.tensorflow_fuckin_save

# test model 

In [87]:
def fix_ytest(Ytest) -> np.array:
    pass

windows = pull_windows(df, slide=1, window_length=10)
Xtest, Ytest = contruct_train_test(windows)

Ytest_predicted = model.predict(Xtest)  # may not be correct - tf.keras.model should have a predict function

# Yest_predicted == Ytest?
Ytest_predicted_fixed = fix(Ytest_predicted)  # i.e. a.index(max([0, .1, .22, .7]))

# Plot results

In [94]:
# yay!