In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import class_weight
from sklearn.utils.class_weight import compute_sample_weight

dataset_name = "MotionSense"
x_train = pd.read_csv(dataset_name+"_x_train.csv")
y_train = pd.read_csv(dataset_name+"_y_train.csv")    
x_test = pd.read_csv(dataset_name+"_x_test.csv")
y_test = pd.read_csv(dataset_name+"_y_test.csv")

x_train["acc_x"] = x_train["userAcceleration.x"]+x_train["gravity.x"]
x_train["acc_y"] = x_train["userAcceleration.y"]+x_train["gravity.y"]
x_train["acc_z"] = x_train["userAcceleration.z"]+x_train["gravity.z"]
x_train = x_train.drop(columns=["userAcceleration.x","userAcceleration.y","userAcceleration.z", "gravity.x", "gravity.y", "gravity.z"])
x_train.rename(columns={"rotationRate.x":"rot_x", "rotationRate.y":"rot_y", "rotationRate.z":"rot_z"}, inplace=True)
x_train.rename(columns={"attitude.roll":"roll", "attitude.pitch":"pitch", "attitude.yaw":"yaw"}, inplace=True)


x_test["acc_x"] = x_test["userAcceleration.x"]+x_test["gravity.x"]
x_test["acc_y"] = x_test["userAcceleration.y"]+x_test["gravity.y"]
x_test["acc_z"] = x_test["userAcceleration.z"]+x_test["gravity.z"]
x_test = x_test.drop(columns=["userAcceleration.x","userAcceleration.y","userAcceleration.z", "gravity.x", "gravity.y", "gravity.z"])
x_test.rename(columns={"rotationRate.x":"rot_x", "rotationRate.y":"rot_y", "rotationRate.z":"rot_z"}, inplace=True)
x_test.rename(columns={"attitude.roll":"roll", "attitude.pitch":"pitch", "attitude.yaw":"yaw"}, inplace=True)

y_train.rename(columns={"act":"activity", "id":"userid"}, inplace=True)
y_test.rename(columns={"act":"activity", "id":"userid"}, inplace=True)

x_train.shape, y_train.shape, x_test.shape, y_test.shape


train_dataset = x_train
train_dataset['activity'] = y_train['activity']
train_dataset['userid'] = y_train['userid']
train_dataset['trial'] = y_train['trial']


test_dataset = x_test
test_dataset['activity'] = y_test['activity']
test_dataset['userid'] = y_test['userid']
test_dataset['trial'] = y_test['trial']

print(train_dataset.shape, test_dataset.shape)

train_dataset = train_dataset[['acc_x', 'acc_y', 'acc_z', 'rot_x', 'rot_y', 'rot_z', 
                              'activity', 'userid', 'trial']]
test_dataset = test_dataset[['acc_x', 'acc_y', 'acc_z', 'rot_x', 'rot_y', 'rot_z',
                              'activity', 'userid','trial']]                              

features = list(train_dataset.columns[:6])
info = list(train_dataset.columns[6:])
features, info                              



print("Original Training Data Mean/Std:\n", train_dataset[features].describe().loc[['mean','std']])  
print("Original Test Data Mean/Std:\n",test_dataset[features].describe().loc[['mean','std']])  

data_train = train_dataset[features]
data_means = data_train.mean(0)
data_stds = data_train.std(0)
train_dataset[features] = (data_train - data_means)/(data_stds)

data_test = test_dataset[features]
test_dataset[features] = (data_test - data_means)/(data_stds)

print("Standardized Training Data Mean/Std:\n", train_dataset[features].describe().loc[['mean','std']])  
print("Standardized Test Data Mean/Std:\n",test_dataset[features].describe().loc[['mean','std']])  


from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
class SensorWindowGenerator(TimeseriesGenerator):
    def __init__(self, data, targets, info, length,
                 sampling_rate=1,
                 stride=1,
                 start_index=0,
                 end_index=None,
                 shuffle=False,
                 reverse=False,
                 batch_size=128):
        super().__init__(data, targets, length, sampling_rate, stride, start_index, end_index, shuffle,reverse,batch_size)
        self.info = info
    
    def __getitem__(self, index):
        if self.shuffle:
            rows = np.random.randint(
                self.start_index, self.end_index + 1, size=self.batch_size)
        else:
            i = self.start_index + self.batch_size * self.stride * index
            rows = np.arange(i, min(i + self.batch_size *
                                    self.stride, self.end_index + 1), self.stride)

        samples = np.array([self.data[row - self.length:row:self.sampling_rate]
                            for row in rows 
                            if np.all(self.info[row - self.length] == self.info[row]) ])
        targets = np.array([self.targets[row] for row in rows if np.all(self.info[row - self.length] == self.info[row])])
        
        infos = np.array([self.targets[row] for row in rows if np.all(self.info[row - self.length] == self.info[row])])
        
        if self.reverse:
            return samples[:, ::-1, ...], targets
        return samples, targets

def get_xyi_single(train_data, test_data, features, label, info):
    x_train = train_data[features]
    x_test  = test_data[features]
    y_train = train_data[label]
    y_test  = test_data[label]
    i_train = train_data[info]
    i_test  = test_data[info]
    return x_train.values, y_train.values, x_test.values, y_test.values, i_train.values, i_test.values

def get_generators(train_data, test_data, features, label, info, length, train_stride, test_stride, batch_size, sampling_rate):
    x_train, y_train, x_test, y_test, i_train, i_test = get_xyi_single(train_data,test_data, features,label,info)
    train_gen = SensorWindowGenerator(data = x_train,
                                      targets = y_train,
                                      info = i_train,
                                      stride = train_stride, 
                                      length = length, 
                                      batch_size = batch_size,
                                      sampling_rate=sampling_rate)
    test_gen = SensorWindowGenerator(data = x_test,
                                     targets = y_test,
                                     info = i_train,
                                     stride = test_stride, 
                                     length = length, 
                                     batch_size=batch_size,
                                     sampling_rate=sampling_rate)
    return train_gen, test_gen     
################################################################################

def get_data(train_dataset, test_dataset, sensors,acts,stride_size,w_size,
             org_smpl_rate,re_size_smpl_rate,verbose=False):
    
    if verbose:
        print("Original Data:\n", train_dataset.shape, test_dataset.shape)

    features = list(train_dataset.columns[:6])    
    info = list(train_dataset.columns[6:])

    if verbose:
        print("Standardized Training Data Mean/Std:\n", train_dataset[features].describe().loc[['mean','std']])  
        print("Standardized Test Data Mean/Std:\n",test_dataset[features].describe().loc[['mean','std']])  
    if verbose:
        print("Stride for each Activity:\n", dict(zip([act_lbls[x] for x in acts],stride_size)))

    length = w_size ## Sliding Window length (Size)
    sampling_rate = 1 ## Larger integers means lower frequency
    batch_size = int(1e12)

    X_train = np.zeros((0, int(w_size*(re_size_smpl_rate/org_smpl_rate)), len(features),1))
    y_train = np.zeros((0, len(info)))
    X_test =  np.zeros((0, int(w_size*(re_size_smpl_rate/org_smpl_rate)), len(features),1))
    y_test =  np.zeros((0, len(info)))

    for lbl in  sorted(train_dataset['activity'].unique()):
        train_stride = stride_size[int(lbl)] ## Step Size of the Sliding Window for training
        test_stride = train_stride ## Step Size of the Sliding Window for testing
        
        train_gen, test_gen = get_generators(train_dataset[train_dataset['activity']==lbl],
                                            test_dataset[test_dataset['activity']==lbl],
                                            features,
                                            ['activity', 'userid','trial'],                                                                                      
                                            ['userid','trial'],
                                            length, train_stride, test_stride, batch_size, sampling_rate)
                
        x_train_gen, y_train_gen = train_gen[0]
        x_test_gen, y_test_gen = test_gen[0]
        
        x_train_gen = np.expand_dims(x_train_gen,3)
        x_test_gen = np.expand_dims(x_test_gen,3)
        
        X_train = np.append(X_train, x_train_gen, axis=0)
        y_train = np.append(y_train, y_train_gen, axis=0)
        X_test = np.append(X_test, x_test_gen, axis=0)
        y_test = np.append(y_test, y_test_gen, axis=0)
        if verbose:
            print(act_lbls[int(lbl)])
            print(X_train.shape, x_train_gen.shape)
            print(X_test.shape, x_test_gen.shape)

    nb_act_classes = len(np.unique(y_train[:,0]))

    if verbose:        
        print("Shapes:\n", X_train.shape, y_train.shape, X_test.shape, y_test.shape)
        print("Activities Samples (Train):\n", pd.DataFrame(y_train[:,0])[0].value_counts())
        print("Activities Samples (Test):\n", pd.DataFrame(y_test[:,0])[0].value_counts())
    ###################### ONLY ACT #################
    return X_train, y_train[:,0], X_test, y_test[:,0]

act_lbls = ["STN","STU","WAL","JOG","STD","SIT"]

w_time = 2.56

org_smpl_rate = 50
re_size_smpl_rate = 50
stride_size = np.array([25,25,50,25,50,50])

w_size = int(w_time*org_smpl_rate)
ms_X_train, ms_Y_train, ms_X_test, ms_Y_test = get_data(train_dataset, test_dataset,
                                                        sensors ="ag",
                                            acts = [0,1,2,3,4,5],
                                            stride_size = stride_size,
                                            w_size = w_size, 
                                            org_smpl_rate = org_smpl_rate,
                                            re_size_smpl_rate=re_size_smpl_rate,
                                            verbose=True)
print(ms_X_train.shape, ms_Y_train.shape, "\n",
      ms_X_test.shape, ms_Y_test.shape)

nb_act_classes = len(np.unique(ms_Y_train))
ms_act_weights = class_weight.compute_class_weight('balanced',
                                                range(nb_act_classes),
                                                ms_Y_train).round(3)
ms_act_weights_dict = dict(zip(range(len(ms_act_weights)),ms_act_weights))                                                
print("Activity Weights",dict(zip(act_lbls,ms_act_weights)))

np.save("X_train.npy", ms_X_train[:,:,:,0])
np.save("X_test.npy", ms_X_test[:,:,:,0])
np.save("y_train.npy", ms_Y_train)
np.save("y_test.npy", ms_Y_test)

(1081446, 12) (331419, 12)
Original Training Data Mean/Std:
          acc_x     acc_y     acc_z     rot_x     rot_y     rot_z
mean  0.032411  0.830334 -0.086742  0.004127  0.016184  0.013612
std   0.441784  0.622959  0.506685  1.321412  1.246579  0.822903
Original Test Data Mean/Std:
          acc_x     acc_y     acc_z     rot_x     rot_y     rot_z
mean  0.057188  0.696609 -0.141355  0.009218  0.009352  0.009982
std   0.468595  0.662019  0.556168  1.195210  1.158414  0.757516


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Standardized Training Data Mean/Std:
              acc_x         acc_y         acc_z         rot_x         rot_y  \
mean -5.777648e-15  2.215782e-13 -5.838397e-15 -7.838207e-17 -6.861326e-17   
std   1.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00   

             rot_z  
mean -9.545500e-17  
std   1.000000e+00  
Standardized Test Data Mean/Std:
          acc_x     acc_y     acc_z     rot_x     rot_y     rot_z
mean  0.056085 -0.214661 -0.107784  0.003853 -0.005480 -0.004411
std   1.060690  1.062701  1.097660  0.904495  0.929274  0.920542
Original Data:
 (1081446, 9) (331419, 9)
Standardized Training Data Mean/Std:
              acc_x         acc_y         acc_z         rot_x         rot_y  \
mean -5.777648e-15  2.215782e-13 -5.838397e-15 -7.838207e-17 -6.861326e-17   
std   1.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00   

             rot_z  
mean -9.545500e-17  
std   1.000000e+00  
Standardized Test Data Mean/Std:
          acc_x     acc_y