## RNN Model
- What is the input shape going to be?

- Target categories we're going to have (6):
    - Healthy (NoInjury)
    - Knee
    - Thigh
    - Lower Leg (shin/calves)
    - Ankle + Foot
    - Hip/Pelvis + LumbarSpine + SI.Joint

- We're using LSTM instead of GRU (? .. check)

- How many hidden layers / dense layers?

- Please Check: activation, loss, optimizer?
- Fine-tune hyperparameters: epochs, learning_rate

### how can we ensure that the bq file corresponds to the right metadata row name? https://g.co/gemini/share/4af73bcc7f70

In [19]:
# 0. Imports
import numpy as np
from tensorflow.keras import Sequential, Input, layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [21]:
!pwd

/home/jupyter


In [30]:
import pandas as pd

metadata_df = pd.read_csv("stridecare/data/meta/metadata_for_model.csv")

In [43]:
metadata_df['bq_name'] = metadata_df['filename'].apply(lambda x: 'angles_' + x.split('.')[0])

In [90]:
from google.cloud import bigquery
from tensorflow.keras.preprocessing.sequence import pad_sequences

angles_list = []

for i, bq_name in enumerate(metadata_df['bq_name']):
    if i >=100:
        break

    PROJECT = "stridecare-461809"
    DATASET = "angle_csvs"
    TABLE = bq_name

    query = f"""       
    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    """
    print(f'working on {bq_name}')
    client = bigquery.Client()
    query_job = client.query(query)
    result = query_job.result()
    df = result.to_dataframe()
    df = df.drop(columns=['Frame', 'L_ankle_X', 'L_ankle_Y', 'R_ankle_X', 'R_ankle_Y']).T
    df_array = np.array(df)
    df_array = pad_sequences(df_array, maxlen = 12000, padding="post", truncating='post')
    print(df_array.shape)
    angles_list.append(df_array)


working on angles_20101005T132240
(10, 12000)
working on angles_20101117T132240
(10, 12000)
working on angles_20120703T102550
(10, 12000)
working on angles_20120717T103748
(10, 12000)
working on angles_20120717T105021
(10, 12000)
working on angles_20120809T100115
(10, 12000)
working on angles_20120829T125604
(10, 12000)
working on angles_20121101T095248
(10, 12000)
working on angles_20121122T140316
(10, 12000)
working on angles_20130410T105446
(10, 12000)
working on angles_20130606T134651
(10, 12000)
working on angles_20130620T121501
(10, 12000)
working on angles_20130806T105329
(10, 12000)
working on angles_20130904T115007
(10, 12000)
working on angles_20130910T105157
(10, 12000)
working on angles_20130917T114750
(10, 12000)
working on angles_20130924T105459
(10, 12000)
working on angles_20130924T115413
(10, 12000)
working on angles_20131105T103758
(10, 12000)
working on angles_20131126T104419
(10, 12000)
working on angles_20131127T115511
(10, 12000)
working on angles_20140116T125000


In [148]:
X_angles = np.array(angles_list)

# Now, X_angles is a single NumPy array with shape (100, 10, 12000)
# (assuming 100 iterations, 10 features, 12000 maxlen)

X_angles_train = X_angles[:70]
X_angles_test = X_angles[70:]


In [157]:
X_meta = metadata_df[['age', 'Height', 'Weight', 'Gender']]
#X_meta_train = X_meta.loc[:69, :]
#X_meta_test = X_meta.loc[69:, :]

# We only processed the first 100 BQ names for angles_list,
# so we should only use the first 100 corresponding metadata rows.
X_meta_subset = X_meta.iloc[:100, :] # Get the first 100 rows of metadata

X_meta_train = X_meta_subset.iloc[:70, :] # Samples 0-69 from the subset
X_meta_test = X_meta_subset.iloc[70:, :] # Samples 70-99 from the subset

In [158]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder

scaler = StandardScaler()
X_meta_train_preproc = scaler.fit_transform(X_meta_train.drop(columns='Gender'))
X_meta_test_prerpoc = scaler.transform(X_meta_test.drop(columns='Gender'))

In [159]:
ohe = OneHotEncoder(sparse_output=False, drop="if_binary")
X_meta_train_gender = ohe.fit_transform(X_meta_train[['Gender']])
X_meta_test_gender = ohe.transform(X_meta_test[['Gender']])

In [160]:
X_meta_train_preproc = np.concat((X_meta_train_preproc, X_meta_train_gender), axis = 1)
X_meta_test_preproc = np.concat((X_meta_test_prerpoc, X_meta_test_gender), axis = 1)

In [161]:
from sklearn.preprocessing import LabelEncoder
y = LabelEncoder().fit_transform(metadata_df['InjJoint'])

In [162]:
from tensorflow.keras.utils import to_categorical
y = to_categorical(y[:100])
y_train = y[:70]
y_test = y[70:]
y_test.shape


(30, 6)

In [163]:
len(X_angles_train)

70

In [164]:
#### Imports ####
import numpy as np
from tensorflow.keras import Sequential, Input, layers
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Model

#### 1. Architecture ####

## Creating the sequential & meta inputs ##

    # (1.) Time-series input (coordinates)
time_input = Input(shape=(10,12000), name='time_series_input') #Defining input layer

x = layers.LSTM(256, activation='tanh', return_sequences=True)(time_input) #Stack the layers using the syntax: new_layer()(previous_layer)
x = layers.LSTM(128, activation='tanh', return_sequences=True)(x)
x = layers.LSTM(64, activation='tanh', return_sequences=False)(x)  # final temporal summary

x = layers.Dense(64, activation='relu')(x)  # flatten & process time branch

    # (2.) Metadata Input #
# Assume: shape=(n_meta_features,)
meta_input = Input(shape=(4,), name='meta_input') #do we need to add number of frames, or number of runners ?

y = layers.Dense(32, activation='relu')(meta_input)
y = layers.Dense(16, activation='relu')(y)


## Concatenating the two-inputs ##

    # (3.) Concatenate Both Branches #
combined = layers.Concatenate()([x, y])
z = layers.Dense(64, activation='relu')(combined)
z = layers.Dense(6, activation='softmax')(z)  # final classification layer (6 categories)


## Instantiating the model ##

model = Model(inputs=[time_input, meta_input], outputs=z)




#### 2. Compiling the model ####
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(learning_rate=0.001),
              metrics=['accuracy'])




#### 3. Fit ####
early_stop = EarlyStopping(monitor='val_loss',
                           patience=5,
                           # min_delta=0.001,(??)
                           restore_best_weights=True)
##checking sizes
print(f"Type of X_angles_train: {type(X_angles_train)}")
print(f"Shape of X_angles_train: {X_angles_train.shape}")
print(f"Type of X_meta_train_preproc: {type(X_meta_train_preproc)}")
print(f"Shape of X_meta_train_preproc: {X_meta_train_preproc.shape}")
print(f"Type of y_train: {type(y_train)}")
print(f"Shape of y_train: {y_train.shape}")

model.fit([X_angles_train, X_meta_train_preproc], y_train, 
          epochs=50, 
          batch_size=32, 
          validation_data = ([X_angles_test, X_meta_test_preproc], y_test),
          verbose=1,
          callbacks=[early_stop])



#### 4. Predict ####
#model.predict(??, ??)
          

# 5. Evaluation
#model.evaluate(??, ??)


Type of X_angles_train: <class 'numpy.ndarray'>
Shape of X_angles_train: (70, 10, 12000)
Type of X_meta_train_preproc: <class 'numpy.ndarray'>
Shape of X_meta_train_preproc: (70, 4)
Type of y_train: <class 'numpy.ndarray'>
Shape of y_train: (70, 6)
Epoch 1/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 870ms/step - accuracy: 0.2009 - loss: 1.7958 - val_accuracy: 0.1333 - val_loss: 1.8012
Epoch 2/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 462ms/step - accuracy: 0.2354 - loss: 1.7565 - val_accuracy: 0.1333 - val_loss: 1.8055
Epoch 3/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 454ms/step - accuracy: 0.2366 - loss: 1.7390 - val_accuracy: 0.2667 - val_loss: 1.8110
Epoch 4/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 460ms/step - accuracy: 0.3472 - loss: 1.6905 - val_accuracy: 0.2333 - val_loss: 1.8527
Epoch 5/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 444ms/step - accuracy: 0.3440 - loss:

<keras.src.callbacks.history.History at 0x7f9a80ab5090>

In [146]:
len(X_angles_train)

70