In [None]:
import os
import random

random.seed(43)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils import class_weight

import warnings
warnings.filterwarnings("ignore")

import logging
logging.getLogger().setLevel(logging.ERROR)

import torch

import MLP.run_training_testing_mpl as run_traintest_mlp


In [None]:
# find device
if torch.cuda.is_available(): # NVIDIA
    device = torch.device('cuda')
elif torch.backends.mps.is_available(): # apple silicon
    device = torch.device('mps') 
else:
    device = torch.device('cpu') # fallback
device

In [None]:
num_cpus = os.cpu_count()
print(num_cpus, 'CPUs available')
num_cpus = 1

In [None]:
batch_size =2

## Reading dataset

In [None]:
df=pd.DataFrame(columns=['user', 'activity', 'timestamp', 'x-accel', 'y-accel', 'z-accel', 'device_type',
                         'sensor_type'])

In [None]:
dataai="../data/WISDM_ar_v1.1/"
datapath =  dataai
col_names = ['user', 'activity', 'timestamp', 'x-accel', 'y-accel', 'z-accel']

df = pd.read_csv(datapath+"WISDM_ar_v1.1_raw.txt",
                  header=None, names=col_names, delimiter=',', comment=';',
                    on_bad_lines='skip') #skip/warn bad lines
print(df.shape)
df.head()

In [None]:
num_usrs = df['user'].unique()
print(num_usrs.shape)

In [None]:
class_labels = df.activity.unique()
num_classes = len(class_labels)
print(class_labels)

In [None]:
df["user"].value_counts()
df.loc[(df['user'] == 1) & (df['activity'] == 'Jogging')].head()
tmpdf = df.loc[(df['user'] == 1) & (df['activity'] == 'Jogging')].sort_values('timestamp').head()


In [None]:
# def standardize_and_encodig(dfold):
#     scalar = StandardScaler()
#     act_col=dfold['activity']
#     usr_col=dfold['user']
#     df_no_act = dfold.drop(columns=['activity', 'user'])
#     df_scaled = scalar.fit_transform(df_no_act)
#     df_scaled = pd.DataFrame(df_scaled, columns=df_no_act.columns)
#     df_scaled['activity'] = act_col
#     df_scaled['user'] = usr_col
#     le = LabelEncoder()
#     df_scaled['activity_encoded'] = le.fit_transform(df_scaled['activity'])
#     return df_scaled
# print(df.head())
# df = standardize_and_encodig(df)
# print(df.head())

In [None]:
def encodedf(df):
    le = LabelEncoder()
    df['activity_encoded'] = le.fit_transform(df['activity'])
    df['sensor_type_encoded'] = le.fit_transform(df['sensor_type'])
    df['device_type_encoded'] = le.fit_transform(df['device_type'])
    return df
df = encodedf(df)
# print(df.head())
df.drop(columns=['sensor_type', 'device_type'], inplace=True)
print(df.head())

In [None]:

def plot_samples(num_samples, dataf):
    fig, ax =plt.subplots(num_samples,len(class_labels), figsize=(8, 3.5))

    for jth,usr in enumerate(random.sample(sorted(num_usrs), num_samples)):
        for idx, act in enumerate(class_labels):
            if jth == 0:ax[jth,idx].set_title(act)
            # ax[jth,idx].set_ylim(min(dataf['x-accel'].min(), dataf['y-accel'].min(), dataf['z-accel'].min()),
            #                     max(dataf['x-accel'].max(), dataf['y-accel'].max(), dataf['z-accel'].max()))
            # ax[jth,idx].set_xlim(dataf['timestamp'].min(), dataf['timestamp'].max())
            ax[jth,idx].set_yticklabels([])
            ax[jth,idx].set_xticklabels([])
            ax[jth,idx].sharey(ax[jth,0])
            if idx==0: ax[jth,idx].set_ylabel(f'usr-{usr}')
            tmpdf = dataf.loc[(dataf['user'] == usr) & (dataf['activity'] == act)]
            ax[jth,idx].scatter(tmpdf['timestamp'], tmpdf['x-accel'], label='x-accel', s=0.2)
            ax[jth,idx].scatter(tmpdf['timestamp'], tmpdf['y-accel'], label='y-accel', s=0.2)
            ax[jth,idx].scatter(tmpdf['timestamp'], tmpdf['z-accel'], label='z-accel', s=0.2)
    plt.tight_layout(pad=0., w_pad=0., h_pad=0)
    plt.show()

# should be changes for this dataset later
# plot_samples(4, df)


In [None]:
x = df.drop(columns=['activity', 'activity_encoded', 'user', 'timestamp'])
y = df['activity_encoded']
num_features = x.shape[1]
# print(x.shape, num_features)
print(df.groupby(['activity_encoded', 'activity']).size())
print(y.head())
x.head()


In [None]:
# Computed class weights
ynumpy = y.to_numpy()
class_weights=class_weight.compute_class_weight(class_weight="balanced", classes=np.unique(ynumpy), y=ynumpy) 
class_weights=torch.tensor(class_weights,dtype=torch.float)

print(np.unique(ynumpy),class_weights)
print(class_weights.sum(axis=0))

## Training and Testing MLP model

In [None]:
run_traintest_mlp.run_mlp_training(x,y,class_labels, device, num_features, num_classes,test_size=0.4, val_size=0.2, batch_size=1, num_cpus=num_cpus,
                      lr=0.0001, num_epochs=10,  patience=4, modeltype="mlp2", num_hidden_lyr=4, hidden_lyr_size=num_classes+1, verbose=True, weights=class_weights)