In [1]:
import sys
sys.path += ['..']

from data_collection.data_collection import LoggerSet, Logger

import numpy as np
import pandas as pd
import plotly.express as px
from data_collection.video_data import get_frame_iterator
from pathlib import Path
from typing import Iterable, Tuple, List
from tqdm import tqdm
import datetime
import tensorflow as tf
import tensorflow.keras as keras

%load_ext autoreload
%autoreload 2


2024-06-10 20:12:41.157169: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Loading data 

In [27]:

def prepare_parquets(logpath):
    logpath = Path(logpath)
    logger_set = LoggerSet(logpath, overwrite_ok=False)
    return logger_set.export_to_parquet()

def stack_frames(gen: Iterable[Tuple[int, np.ndarray]]):
    """
    from get_frame_iterator
    """
    frames = []
    for idx, img in gen:
        frames.append(img)
    return np.stack(frames, axis=0)

class Session:
    def __init__(self, logpath):
        logpath = Path(logpath)

        errs = {}
        if not (logpath/'PicameraV2.parquet').exists():
            errs = prepare_parquets(logpath)

        self.data = {}
        self.errs = errs
        self.logpath = logpath

        # main
        self.load_data()
        self.expand_lin_acc()
        self.match_frame_idx()

    def load_data(self):
        """
        reusable
        """
        self.data['camera_df'] =  pd.read_parquet(self.logpath/'PicameraV2.parquet')
        self.data['angular_speed_control_df'] = pd.read_parquet(self.logpath/'AngularSpeedControlV2.parquet')
        self.data['frames'] = stack_frames(get_frame_iterator(self.logpath/"PicameraV2/video"))

    def expand_lin_acc(self, column_name = 'linear_acc'): 
        """
        """
        
        def to_multiple_columns(x):
            return x.iloc[0]

        df = self.data['angular_speed_control_df'].copy()
        expanded = df[[column_name]].dropna().apply(to_multiple_columns, axis=1, result_type='expand')
        expanded.columns=['linacc0', 'linacc1', 'linacc2']

        df = pd.merge(df, expanded, on='idx', how='inner')

        self.data['angular_speed_control_df_lin_acc'] = df


    def match_frame_idx(self, time_offset_ms=100):
        """
        """
        camera_df = self.data['camera_df'].copy()
        angular_speed_control_df = self.data['angular_speed_control_df_lin_acc'].copy()
            
        camera_df['time_expected'] = camera_df['time'] + pd.to_timedelta(time_offset_ms, unit='ms')
        angular_speed_control_df['offset_frame_idx'] = camera_df['time_expected'].searchsorted(angular_speed_control_df['time_AngularSpeedControl'], side='right')

        camera_df['time_exact'] = camera_df['time']
        angular_speed_control_df['exact_frame_idx'] = camera_df['time_exact'].searchsorted(angular_speed_control_df['time_AngularSpeedControl'], side='left')

        self.data['angular_speed_control_df_with_frames'] = angular_speed_control_df

    def __repr__(self):
        return f"Session('{str(self.logpath)}')"

    @classmethod
    def load_multiple_session(cls, parent_folder, inclusion_filter=lambda d: d.stem!='excluded'):
        """
        reusable - instantiates only
        """
        results = []
        for d in filter(inclusion_filter, Path(parent_folder).iterdir()):
            results.append(cls(d))

        return results

    @staticmethod
    def concatenate_multiple_sessions(sessions: List["Session"], df_name='angular_speed_control_df_with_frames'):
        """
        MODIFY THE DATAFRAME *IN PLACE* as a side effect

        depends on match_frame_idx
        """

        frame_set_list = []
        df_list = []
        total_frames = 0

        for s in sessions:

            frames, df = s.data['frames'], s.data[df_name]
            df['offset_total_frame_idx'] = df['offset_frame_idx'] + total_frames
            df['exact_total_frame_idx'] = df['exact_frame_idx'] + total_frames

            total_frames += len(frames)
            df_list.append(df)
            frame_set_list.append(frames)

        return np.concatenate(frame_set_list, axis=0), pd.concat(df_list)

sessions = Session.load_multiple_session('./data')
frames, df = Session.concatenate_multiple_sessions(sessions)

[mov,mp4,m4a,3gp,3g2,mj2 @ 0x640b273d2b80] moov atom not found


In [8]:
def prepare_to_tensorflow(frames, df):

    df = df.query('speed!=0')


    other_inputs = df.groupby('exact_total_frame_idx')[['speed', 'angular_velocity', 'linacc0', 'linacc1', 'linacc2']].mean()
    
    frames_reindexed = frames[df['offset_total_frame_idx']]
    other_inputs = other_inputs.reindex(df['offset_total_frame_idx'].values)
    


    nafilter = ~other_inputs.isna().any(axis=1).values

    return frames_reindexed[nafilter], other_inputs.values[nafilter]/100, df[['new_proportion']].values[nafilter]

In [31]:
frames_reindexed, other_inputs_reindexed, outputs = prepare_to_tensorflow(frames, df)

In [5]:
def validation_chunk_split(n_sample, chuck_size_minmax=(10, 200), val_split=0.3):
    chunk_min, chunk_max = chuck_size_minmax
    assert chunk_min

    split_points = [0]
    idx = 0

    while idx <= n_sample: 
        size = np.random.randint(chunk_max+chunk_min)+chunk_min

        idx = size + idx 
        split_points.append(idx)

    split_points[-1] = n_sample 

    val_idx = []
    train_idx = []
    for _i in range(1, len(split_points)):
        
        
        indices = list(range(split_points[_i-1], split_points[_i]))

        if np.random.rand() > val_split:
            train_idx += indices
        else:
            val_idx += indices


    np.random.shuffle(train_idx)
    np.random.shuffle(val_idx)
    
    return train_idx, val_idx


In [6]:
train_idx, val_idx = validation_chunk_split(len(frames_reindexed), val_split=0.2)
train_frames, train_other, train_out = frames_reindexed[train_idx], other_inputs_reindexed[train_idx], outputs[train_idx]
val_frames, val_other, val_out =frames_reindexed[val_idx], other_inputs_reindexed[val_idx], outputs[val_idx]

# Model

In [9]:
def get_model(lr=.1, other_metrics=[]):
    tf.keras.backend.clear_session()
    image_shape = 64, 114, 3

    img_aug_preprocess_layers = [
        # keras.layers.RandomTranslation(0.05, 0.05, fill_mode='reflect'),
        # keras.layers.RandomRotation(0.02, fill_mode='reflect'),
        # keras.layers.RandomZoom(0.05, fill_mode='reflect'),
        # keras.layers.RandomContrast(0.5),
        # keras.layers.RandomBrightness([-0.7, 0.7])
    ]
    
    cnn_layers =  [
        keras.layers.Conv2D(16, 5, strides=2, activation='relu'), 
        keras.layers.Conv2D(32, 5, strides=2, activation='relu'), 
        keras.layers.Conv2D(64, 5, strides=2, activation='relu'),         
        keras.layers.Conv2D(64, 3, activation='relu'), 
        keras.layers.Conv2D(96, 3, activation='relu'),         
        keras.layers.Flatten()
    ]
    
    cnn = keras.Sequential(
        [
            keras.layers.InputLayer(image_shape),
            keras.layers.Rescaling(1/255), 
            *img_aug_preprocess_layers,
            *cnn_layers
        ])

    other_inputs = keras.layers.InputLayer((2,))
    x = keras.layers.Concatenate()([cnn.outputs[0], other_inputs.output])
    x = keras.layers.Dense(64, activation='relu')(x)
    x = keras.layers.Dense(64, activation='relu')(x)
    y = keras.layers.Dense(1,  activation='sigmoid')(x)

    model = keras.Model([cnn.inputs[0], other_inputs.output], y)


    optimiser = keras.optimizers.Adam(lr)
    model.compile(optimizer=optimiser, loss='Huber', metrics=['MAE']+other_metrics)

    return model 

model = get_model()
model.summary()

In [8]:
from train_tools import find_lr, tg_notify, use_tensorboard, end_epoch_notify

In [23]:
find_lr(get_model(), [train_frames, train_other], train_out)
#tensorboard --logdir src/training/logs --bind_all

In [22]:
model = get_model(1e-6)
model.fit(
    [train_frames, train_other],
    train_out,
    epochs=100, 
    validation_data = ([val_frames, val_other], val_out),
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10),
        use_tensorboard('training'), 
        end_epoch_notify()
        ] 
    )

Epoch 1/100
[1m1449/1449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 11ms/step - MAE: 0.2001 - loss: 0.0425 - val_MAE: 0.1312 - val_loss: 0.0196
Epoch 2/100
[1m1449/1449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - MAE: 0.1331 - loss: 0.0206 - val_MAE: 0.1310 - val_loss: 0.0196
Epoch 3/100
[1m1449/1449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - MAE: 0.1332 - loss: 0.0209 - val_MAE: 0.1309 - val_loss: 0.0196
Epoch 4/100
[1m1449/1449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - MAE: 0.1319 - loss: 0.0202 - val_MAE: 0.1308 - val_loss: 0.0195
Epoch 5/100
[1m1449/1449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - MAE: 0.1315 - loss: 0.0207 - val_MAE: 0.1307 - val_loss: 0.0195
Epoch 6/100
[1m1449/1449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - MAE: 0.1322 - loss: 0.0206 - val_MAE: 0.1306 - val_loss: 0.0194
Epoch 7/100
[1m1449/1449[0m [32m━━━━━━━━━━━━━━━━━━━━[0

KeyboardInterrupt: 

In [18]:
#frames_reindexed#, other_inputs_reindexed, outputs

In [10]:
idx = 5000
y_pred = model.predict([frames_reindexed[:idx], other_inputs_reindexed[:idx]], batch_size=64)

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step


In [19]:
np.c_[outputs[:idx, 0], y_pred[:idx, 0]].shape

(5000, 2)

In [20]:
px.line(np.c_[outputs[:idx, 0], y_pred[:idx, 0]])

In [12]:
px.line(y_pred*100)

In [10]:
model.save('10Jun-pi.keras')