In [1]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.optimizers import Adam
print(tf.__version__)

2.12.0


In [2]:
import pandas as pd
import numpy as np
import os
import math
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [3]:
# filepath = "merged_u1-50_w001_resampled.csv"
filepath = "D:/online dataset csv files preprocessing/merged_u1-50_w001_resampled.csv"

df_original = pd.read_csv(filepath)

In [4]:
df_original

Unnamed: 0,timestamp,LAx,LAy,LAz,GYx,GYy,GYz,name
0,924313410000000,1.242813,-0.160181,-0.900440,0.194931,-0.004242,0.163818,1
1,924313420000000,1.117246,-0.187921,-1.529893,0.236458,0.076378,0.133293,1
2,924313430000000,0.912846,-0.192566,-1.570929,0.226074,0.012253,0.108246,1
3,924313440000000,0.874858,-0.227208,-1.453511,0.139336,-0.050667,0.085030,1
4,924313450000000,0.834022,-0.282976,-1.292671,-0.013374,-0.109314,0.078308,1
...,...,...,...,...,...,...,...,...
2055043,1368327120000000,-9.502644,-7.178468,4.246279,-2.834417,-0.120428,-0.987857,50
2055044,1368327130000000,-9.443804,-7.698220,3.059675,-2.638940,0.561997,-0.947715,50
2055045,1368327140000000,-8.345459,-6.805815,1.853457,-2.483606,1.225222,-0.954696,50
2055046,1368327150000000,-7.580540,-5.629017,1.559257,-2.429500,1.769765,-1.003565,50


In [5]:
df_original.isnull().sum()

timestamp    0
LAx          0
LAy          0
LAz          0
GYx          0
GYy          0
GYz          0
name         0
dtype: int64

In [6]:
df_original.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2055048 entries, 0 to 2055047
Data columns (total 8 columns):
 #   Column     Dtype  
---  ------     -----  
 0   timestamp  int64  
 1   LAx        float64
 2   LAy        float64
 3   LAz        float64
 4   GYx        float64
 5   GYy        float64
 6   GYz        float64
 7   name       int64  
dtypes: float64(6), int64(2)
memory usage: 125.4 MB


In [7]:
df_original['name'].value_counts()

18    82714
47    65984
29    60052
8     60036
6     60005
9     59986
13    59984
41    59984
39    59958
3     59958
37    55050
32    52339
12    51686
2     48001
7     47947
11    47946
34    47824
49    47614
42    41946
19    41946
40    41946
35    41945
33    41945
27    41535
30    40959
24    35980
16    35974
20    35948
28    35947
31    35947
4     35946
23    34545
38    32186
44    31414
43    30100
17    29997
48    29990
50    29988
14    29950
46    29945
10    28892
1     26790
45    26022
25    25926
5     25792
22    25598
36    25173
26    23949
15    17985
21    15774
Name: name, dtype: int64

In [8]:
df_Xtime = df_original.copy()
df_Xtime = df_Xtime.drop(['timestamp'], axis = 1).copy()
df_Xtime.shape

(2055048, 7)

In [9]:
X = df_Xtime[['LAx', 'LAy', 'LAz','GYx', 'GYy', 'GYz']].copy()
y = df_Xtime['name'].copy()

In [10]:
scaler = StandardScaler()
X_transformed = X.copy()
X_transformed = scaler.fit_transform(X_transformed)

scaled_X = pd.DataFrame(data = X_transformed, columns = ['LAx', 'LAy', 'LAz','GYx', 'GYy', 'GYz'])
scaled_X['label'] = y.values

scaled_X

Unnamed: 0,LAx,LAy,LAz,GYx,GYy,GYz,label
0,0.359745,-0.067021,-0.237446,0.105221,0.035341,0.175545,1
1,0.322625,-0.073783,-0.418039,0.133911,0.092852,0.143484,1
2,0.262201,-0.074915,-0.429812,0.126737,0.047108,0.117177,1
3,0.250972,-0.083358,-0.396125,0.066811,0.002223,0.092793,1
4,0.238900,-0.096951,-0.349979,-0.038693,-0.039614,0.085733,1
...,...,...,...,...,...,...,...
2055043,-2.816791,-1.777656,1.239172,-1.987696,-0.047543,-1.034070,50
2055044,-2.799397,-1.904341,0.898730,-1.852644,0.439278,-0.991908,50
2055045,-2.474708,-1.686826,0.552660,-1.745327,0.912402,-0.999240,50
2055046,-2.248585,-1.399994,0.468253,-1.707947,1.300863,-1.050568,50


In [11]:
import scipy.stats as stats

Fs = 100
frame_size = Fs*6
hop_size = int(Fs*1.5)

In [12]:
def get_frames(df, frame_size, hop_size):
    N_FEATURES = 6

    frames = []
    labels = []
    
    last_value = df['label'].iloc[-1]
    for j in range (1,last_value+1): 
        filtered_df = df[df['label'] == j]
        filtered_df = filtered_df.drop('label', axis = 1)
        for i in range(0, len(filtered_df) - frame_size, hop_size):
            LAx = filtered_df['LAx'].values[i: i + frame_size]
            LAy = filtered_df['LAy'].values[i: i + frame_size]
            LAz = filtered_df['LAz'].values[i: i + frame_size]
            GYx = filtered_df['GYx'].values[i: i + frame_size]
            GYy = filtered_df['GYy'].values[i: i + frame_size]
            GYz = filtered_df['GYz'].values[i: i + frame_size]
            

            label = j

            frames.append([LAx, LAy, LAz, GYx, GYy , GYz])
            labels.append(label)

    frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
    labels = np.asarray(labels)
    
    return frames, labels

In [13]:
X, y = get_frames(scaled_X,frame_size, hop_size)
X.shape, y.shape

((13518, 600, 6), (13518,))

In [14]:
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0, stratify = y)

X_train, X_validation, y_train, y_validation = train_test_split(X_train_val, y_train_val, test_size = 0.1, random_state = 0, stratify = y_train_val)
X_train.shape, X_test.shape, X_validation.shape

((8515, 600, 6), (4056, 600, 6), (947, 600, 6))

In [15]:
X_train[0].shape, X_test[0].shape, X_validation[0].shape

((600, 6), (600, 6), (600, 6))

In [16]:
num = df_original['name'].iloc[-1]
num 

50

In [17]:
# Subtract 1 from each label in y_train
y_train = np.array(y_train) - 1
y_test = np.array(y_test) - 1
y_validation = np.array(y_validation) - 1

In [18]:
y_train.shape, y_test.shape, y_validation.shape

((8515,), (4056,), (947,))

In [19]:
y_train_o = np.eye(50)[y_train]
y_test_o = np.eye(50)[y_test]
y_validation_o = np.eye(50)[y_validation]

In [20]:
y_train_o.shape, y_validation_o.shape, y_validation_o.shape

((8515, 50), (947, 50), (947, 50))

In [21]:
# tut
# model = Sequential()
# model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=X_train[0].shape))
# model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
# model.add(Dropout(0.5))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Flatten())
# model.add(Dense(100, activation='relu'))
# model.add(Dense(50, activation='softmax'))


In [22]:
# chatgpt
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=X_train[0].shape))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(50, activation='softmax'))

In [23]:
model.summary(line_length = 75)

Model: "sequential"
___________________________________________________________________________
 Layer (type)                    Output Shape                  Param #     
 conv1d (Conv1D)                 (None, 598, 32)               608         
                                                                           
 max_pooling1d (MaxPooling1D)    (None, 299, 32)               0           
                                                                           
 conv1d_1 (Conv1D)               (None, 297, 64)               6208        
                                                                           
 max_pooling1d_1 (MaxPooling1D)  (None, 148, 64)               0           
                                                                           
 conv1d_2 (Conv1D)               (None, 146, 128)              24704       
                                                                           
 max_pooling1d_2 (MaxPooling1D)  (None, 73, 128)               0    

In [24]:
model.compile(optimizer=Adam(learning_rate = 0.001), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics = ['accuracy'])

In [25]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    mode = 'auto',
    min_delta = 0,
    patience = 2,
    verbose = 0,
    restore_best_weights = True
)

In [26]:
history = model.fit(X_train, y_train, epochs = 30,callbacks = [early_stopping], 
                    validation_data= (X_validation, y_validation), verbose=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


In [27]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print('Test loss: {0: .4f}. Test accuracy: {1: .2f}%'.format(test_loss, test_accuracy*100))

Test loss:  0.1361. Test accuracy:  92.85%


# Get W003 and test on it while training on W001 only

In [28]:
# filepath = "merged_u1-50_w001_resampled.csv"
filepath_3 = "D:/online dataset csv files preprocessing/merged_14u_w003_resampled.csv"

df_original_3 = pd.read_csv(filepath_3)
df_original_3

Unnamed: 0,timestamp,LAx,LAy,LAz,GYx,GYy,GYz,name
0,923709910000000,1.897600,-0.948629,0.806025,-0.913564,-0.310084,-0.732483,1
1,923709920000000,1.822922,-0.512797,0.658468,-0.688972,-0.554016,-0.731674,1
2,923709930000000,1.571234,0.178020,-0.381536,-0.285202,-0.868012,-0.680359,1
3,923709940000000,1.347669,0.698209,-1.856026,0.160721,-1.098297,-0.580788,1
4,923709950000000,1.484009,0.825438,-3.034368,0.557777,-1.373177,-0.456787,1
...,...,...,...,...,...,...,...,...
522631,341510670000000,-0.620103,-0.362797,-0.729507,-0.252296,-1.060326,0.087654,46
522632,341510680000000,-0.670146,-0.440084,-0.736829,-0.247093,-1.092079,0.098557,46
522633,341510690000000,-0.770983,-0.401897,-0.680590,-0.236572,-1.106628,0.115730,46
522634,341510700000000,-0.890548,-0.338852,-0.699221,-0.220085,-1.100670,0.133514,46


In [29]:
df_original_3.isnull().sum()

timestamp    0
LAx          0
LAy          0
LAz          0
GYx          0
GYy          0
GYz          0
name         0
dtype: int64

In [30]:
df_original_3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522636 entries, 0 to 522635
Data columns (total 8 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   timestamp  522636 non-null  int64  
 1   LAx        522636 non-null  float64
 2   LAy        522636 non-null  float64
 3   LAz        522636 non-null  float64
 4   GYx        522636 non-null  float64
 5   GYy        522636 non-null  float64
 6   GYz        522636 non-null  float64
 7   name       522636 non-null  int64  
dtypes: float64(6), int64(2)
memory usage: 31.9 MB


In [31]:
df_original_3['name'].value_counts()

43    77605
2     47948
18    47922
23    44013
28    40959
33    36637
21    36396
46    29990
36    29528
10    28251
25    27562
24    25926
1     25325
40    24574
Name: name, dtype: int64

In [32]:
df_Xtime_3 = df_original_3.copy()
df_Xtime_3 = df_Xtime_3.drop(['timestamp'], axis = 1).copy()
df_Xtime_3.shape

(522636, 7)

In [33]:
X_3 = df_Xtime_3[['LAx', 'LAy', 'LAz','GYx', 'GYy', 'GYz']].copy()
y_3 = df_Xtime_3['name'].copy()

In [34]:
scaler = StandardScaler()
X_transformed_3 = X_3.copy()
X_transformed_3 = scaler.fit_transform(X_transformed_3)

scaled_X_3 = pd.DataFrame(data = X_transformed_3, columns = ['LAx', 'LAy', 'LAz','GYx', 'GYy', 'GYz'])
scaled_X_3['label'] = y_3.values

scaled_X_3

Unnamed: 0,LAx,LAy,LAz,GYx,GYy,GYz,label
0,0.617858,-0.278392,0.122873,-0.539603,-0.202006,-0.632854,1
1,0.594677,-0.178000,0.082692,-0.404722,-0.364584,-0.632063,1
2,0.516551,-0.018873,-0.200509,-0.162233,-0.573859,-0.581893,1
3,0.447155,0.100951,-0.602023,0.105571,-0.727341,-0.484544,1
4,0.489476,0.130257,-0.922893,0.344028,-0.910546,-0.363310,1
...,...,...,...,...,...,...,...
522631,-0.163656,-0.143448,-0.295264,-0.142471,-0.702034,0.168981,46
522632,-0.179190,-0.161251,-0.297257,-0.139346,-0.723197,0.179640,46
522633,-0.210491,-0.152454,-0.281943,-0.133028,-0.732894,0.196430,46
522634,-0.247605,-0.137932,-0.287016,-0.123126,-0.728923,0.213818,46


In [35]:
# import scipy.stats as stats

# Fs = 100
# frame_size = Fs*6 
# hop_size = Fs*6

In [36]:
X_3, y_3 = get_frames(scaled_X_3,frame_size, hop_size)
X_3.shape, y_3.shape

((3434, 600, 6), (3434,))

In [37]:
X_train_val_3, X_test_3, y_train_val_3, y_test_3 = train_test_split(X_3, y_3, test_size=0.3, random_state=0, stratify=y_3)

X_train_3, X_validation_3, y_train_3, y_validation_3 = train_test_split(X_train_val_3, y_train_val_3, test_size=0.1, random_state=0, stratify=y_train_val_3)
X_train_3.shape, X_test_3.shape, X_validation_3.shape


((2162, 600, 6), (1031, 600, 6), (241, 600, 6))

In [38]:
X_train_3[0].shape, X_test_3[0].shape, X_validation_3[0].shape

((600, 6), (600, 6), (600, 6))

In [39]:
# Subtract 1 from each label in y_train
y_train_3 = np.array(y_train_3) - 1
y_test_3 = np.array(y_test_3) - 1
y_validation_3 = np.array(y_validation_3) - 1
y_3 = np.array(y_3) -1

In [40]:
y_train_3.shape, y_test_3.shape, y_validation_3.shape

((2162,), (1031,), (241,))

In [41]:
test_loss, test_accuracy = model.evaluate(X_test_3, y_test_3)
print('Test loss: {0: .4f}. Test accuracy: {1: .2f}%'.format(test_loss, test_accuracy*100))

Test loss:  8.0820. Test accuracy:  47.24%


In [42]:
X_3.shape, y_3.shape

((3434, 600, 6), (3434,))

In [43]:
X_train_3.shape, y_train_3.shape

((2162, 600, 6), (2162,))

In [44]:
test_loss, test_accuracy = model.evaluate(X_train_3, y_train_3)
print('Test loss: {0: .4f}. Test accuracy: {1: .2f}%'.format(test_loss, test_accuracy*100))

Test loss:  7.9848. Test accuracy:  45.61%


In [45]:
test_loss, test_accuracy = model.evaluate(X_3, y_3)
print('Test loss: {0: .4f}. Test accuracy: {1: .2f}%'.format(test_loss, test_accuracy*100))

Test loss:  8.0143. Test accuracy:  46.27%
