In [1]:
import sys
sys.path += ['../..']
sys.path += ['..']

from data_collection.data_collection import LoggerSet, Logger

import numpy as np
import pandas as pd
import plotly.express as px
from data_collection.video_data import get_frame_iterator
from pathlib import Path
from typing import Iterable, Tuple, List
from tqdm import tqdm
import datetime
import tensorflow as tf
import tensorflow.keras as keras

%load_ext autoreload
%autoreload 2


2024-07-09 21:05:19.627250: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Note
- to predict the angular velocity at muliple time points
    - only the the output beyond 200ms shall be used for the model evaluation

# Loading data 

In [2]:
from with_linear_acc_v2 import Session
sessions_v0 = Session.load_multiple_session('../data/v0',         
    camera_log_name = 'PicameraV2', 
    angular_speed_control_log_name='AngularSpeedControlV2'
)
sessions_v30Jun = Session.load_multiple_session('../data/v30Jun')


[mov,mp4,m4a,3gp,3g2,mj2 @ 0x603898f38bc0] moov atom not found
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x60388c455f80] moov atom not found
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x60389a237d80] moov atom not found


In [4]:
def match_time(t1_col, t2_col, t1_offset_ms=0, t2_offset_ms=0, sort_check=True):
    """
    return the indices of t1_col that match t2
    """
    if sort_check: 
        sortedt1 = np.sort(t1_col)
        sortedt2 = np.sort(t2_col)
        sortedt1 = sortedt1[~np.isnan(sortedt1)]
        sortedt2 = sortedt2[~np.isnan(sortedt2)]

        assert np.all(t1_col[~np.isnan(t1_col)] == sortedt1)
        assert np.all(t2_col[~np.isnan(t2_col)] == sortedt2)
    return np.searchsorted(t1_col+pd.to_timedelta(t1_offset_ms, unit='ms'), t2_col+pd.to_timedelta(t2_offset_ms, unit='ms'))

In [5]:
def mean_nominmax(x):
    return (x.sum()-x.max()-x.min())/(len(x)-2)

def data_prep(obj: Session):

    def add_col_rolling_mean_nominmax(window, shift, center):
        df = obj.data['angular_speed_control_df']
        df['angular_velocity_smooth'] = df['angular_velocity'].rolling(window, center=center).apply(mean_nominmax).shift(shift)

    add_col_rolling_mean_nominmax(window=50, shift=-25, center=False )


    def add_angular_velocity_smooth_offset_columns():
        adf = obj.data['angular_speed_control_df']
        cdf = obj.data['camera_df']
                
        offset_list = [50, 100, 200, 400]
        tidxs = [
            match_time(adf['time_AngularSpeedControl'], cdf['time'],  t2_offset_ms=_offset)
            for _offset in offset_list
        ]
        col_names: List = ['angular_velocity_smooth']
        omega_shifted = np.concatenate([
                adf[col_names].values[_tidx] 
                for _tidx in tidxs
            ], axis=1)

        column_names = [c+'_'+str(t) for t in offset_list for c in col_names]
        cdf[column_names] = omega_shifted
        obj.data['add_angular_velocity_smooth_offset_columns'] = column_names

    add_angular_velocity_smooth_offset_columns()


    def add_speed_column():
        adf = obj.data['angular_speed_control_df']
        cdf = obj.data['camera_df']

        tidx = match_time(cdf['time'],  adf['time_AngularSpeedControl'])
        adf['frame_match'] = tidx
        mean_speed = adf.groupby('frame_match')['speed'].mean()

        # this is the mean speed after the end (acquisition) of the frame until the end of next frame
        cdf['mean_speed'] = mean_speed

    add_speed_column()

    

def sample_prep(obj: Session):
    frames, camera_df = obj.data['frames'], obj.data['camera_df']

    camera_df = camera_df.query('mean_speed>0')
    out = camera_df[obj.data['add_angular_velocity_smooth_offset_columns']]
    
    frames = frames[out.index]

    obj.samples = frames, out.values/100
    


def stack_samples(objs: List[Session], sample_axis=0):
    samples_stacked = []
    for s in zip(*[o.samples for o in objs]):
        samples_stacked.append(np.concatenate(s, axis=sample_axis))
    return samples_stacked



In [6]:
[data_prep(s) for s in tqdm(sessions_v30Jun+sessions_v0)]; 
[sample_prep(s) for s in tqdm(sessions_v30Jun+sessions_v0)]; 

x, y = stack_samples(sessions_v30Jun)

from train_tools import validation_chunk_split
train_idx, val_idx = validation_chunk_split(len(x), val_split=0.2)
train_x, train_y = x[train_idx], y[train_idx]
val_x, val_y = x[val_idx], y[val_idx]


100%|██████████| 4/4 [00:09<00:00,  2.26s/it]
100%|██████████| 4/4 [00:00<00:00, 50.65it/s]


## Inspection only

In [8]:
df_vis = sessions[0].data['angular_speed_control_df']

In [52]:

def mean_nominmax(x):
    return (x.sum()-x.max()-x.min())/(len(x)-2)
asd0 = df_vis['angular_velocity'].rolling(100).apply(mean_nominmax)#.shift(-99)

asd = df_vis['angular_velocity'].rolling(50).apply(mean_nominmax).shift(-25)
asd1 = df_vis['angular_velocity'].rolling(3).apply(mean_nominmax).shift(-2)


px.line(y=[asd1, asd, asd0])

In [54]:
px.line(df_vis, y=['angular_velocity_smooth', 'angular_velocity'])

In [83]:
px.imshow(sessions[0].data['frames'][-1000])

# Model

In [7]:
def get_model(lr=.1, other_metrics=[]):
    tf.keras.backend.clear_session()
    image_shape = 64, 114, 3

    img_aug_preprocess_layers = [
        keras.layers.RandomTranslation(0.05, 0.05, fill_mode='reflect'),
        keras.layers.RandomRotation(0.02, fill_mode='reflect'),
        keras.layers.RandomZoom(0.05, fill_mode='reflect'),
        keras.layers.RandomContrast(0.3),
        keras.layers.RandomBrightness(factor=0.6, value_range=[0, 1])
    ]
    
    layers =  [
        keras.layers.Conv2D(16, 5, strides=2, activation='relu'), 
        keras.layers.Conv2D(32, 5, strides=2, activation='relu'), 
        keras.layers.Conv2D(64, 5, strides=2, activation='relu'),         
        keras.layers.Conv2D(64, (5, 3), activation='relu'), 
        keras.layers.Flatten(),
        keras.layers.Dropout(0.2),
        
        keras.layers.Dense(64, activation='relu'), 
        keras.layers.Dense(64, activation='relu'), 
        keras.layers.Dense(4), 
    ]
    
    model = keras.Sequential(
        [
            keras.layers.InputLayer(image_shape),
            keras.layers.Rescaling(1/255), 
            *img_aug_preprocess_layers,
            *layers, 
        ])


    optimiser = keras.optimizers.Adam(lr)
    model.compile(optimizer=optimiser, loss='Huber', metrics=['MAE']+other_metrics)

    return model 

model = get_model()
model.summary()

2024-07-09 21:06:21.615908: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-07-09 21:06:21.653873: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-07-09 21:06:21.654120: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [8]:
from train_tools import find_lr, tg_notify, use_tensorboard, end_epoch_notify

In [9]:
find_lr(get_model(), train_x, train_y)
#tensorboard --logdir ./logs --bind_all

Epoch 1/100


2024-07-09 21:06:34.602898: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - MAE: 0.3320 - loss: 0.1147 - learning_rate: 1.0000e-06
Epoch 2/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - MAE: 0.3362 - loss: 0.1191 - learning_rate: 1.1220e-06
Epoch 3/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - MAE: 0.3343 - loss: 0.1176 - learning_rate: 1.2589e-06
Epoch 4/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - MAE: 0.3293 - loss: 0.1135 - learning_rate: 1.4125e-06
Epoch 5/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - MAE: 0.3294 - loss: 0.1149 - learning_rate: 1.5849e-06
Epoch 6/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - MAE: 0.3319 - loss: 0.1170 - learning_rate: 1.7783e-06
Epoch 7/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - MAE: 0.3376 - loss: 0.1185 - learning_rate: 1.9953e-06
Epo

<keras.src.callbacks.history.History at 0x72efded62a10>

In [10]:
model = get_model(3e-4)
model.fit(
    train_x,
    train_y,
    epochs=100, 
    validation_data = (val_x, val_y),
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10),
        use_tensorboard('training'), 
        end_epoch_notify()
        ] 
    )

Epoch 1/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - MAE: 0.3299 - loss: 0.1163 - val_MAE: 0.3346 - val_loss: 0.1077
Epoch 2/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - MAE: 0.3322 - loss: 0.1147 - val_MAE: 0.3342 - val_loss: 0.1029
Epoch 3/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - MAE: 0.3210 - loss: 0.1086 - val_MAE: 0.3279 - val_loss: 0.1038
Epoch 4/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - MAE: 0.3325 - loss: 0.1144 - val_MAE: 0.3138 - val_loss: 0.0974
Epoch 5/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - MAE: 0.3152 - loss: 0.1052 - val_MAE: 0.2995 - val_loss: 0.0954
Epoch 6/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - MAE: 0.2912 - loss: 0.0912 - val_MAE: 0.3173 - val_loss: 0.0987
Epoch 7/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms

<keras.src.callbacks.history.History at 0x72efded74d00>

In [None]:
#frames_reindexed#, other_inputs_reindexed, outputs

# visualise the performance

In [15]:

#train_frames, train_other, train_out
val_idxv = np.sort(val_idx)
train_idxv = np.sort(train_idx)

val_y_pred = model.predict(x[val_idxv], batch_size=64)
train_y_pred = model.predict(x[train_idxv], batch_size=64)


val_outv = y[val_idxv]
train_outv = y[train_idxv]

px.line(np.c_[train_outv[:, :], train_y_pred[:, :]], title='train')

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [18]:
px.line(np.c_[val_outv[:, 3], val_y_pred[:, 3]], title='train')

In [19]:
#model.export('29Jun-export.keras')
model.save('model1-9Jul.keras')


In [21]:
x_val2, y_val2 = stack_samples(sessions_v0)

y_val2_pred = model.predict(x_val2, batch_size=64)


[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [25]:
px.line(np.c_[y_val2[:, 2], y_val2_pred[:, 3]], title='val2')