In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from sklearn import preprocessing
from scipy import stats
from sklearn import preprocessing
import sklearn 
import keras.backend
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics


# Read files
def read_data(file_name):
    df = pd.read_csv(file_name, comment = ';')
    df["time"] = pd.date_range(pd.to_datetime('2021-04-14'+' '+df.time[0]),periods = df.shape[0], freq='0.01086S')
    df.set_index('time', inplace = True)
    df.index = df.index.values.astype('M8[ms]')
    df = df.resample("10L", base = 4).first().interpolate()
    df = df.rename(columns = {"index":"time"})
    return df
def read_esense(file):
    df = pd.read_csv(file,header=None)
    df.columns = ['time','device','accx','accy','accz','acc1x','acc1y','acc1z','label']
    df['time'] = pd.to_datetime(df['time'], unit='ms') + pd.Timedelta(hours = 2)
    df.index = df['time']
    df = df.resample("10L", base = 4).first().interpolate().drop(columns = ["time","device","label"])
    return df 


# For subject2 ankle
dribble_sub2_ankle = read_data('dribbling_sbj2_ankle.csv')
layup_sub2_ankle = read_data('layup_sbj2_ankle.csv')
movements_sub2_ankle_1 = read_data('movements_sbj2_ankle_1.csv')
shooting_sub2_ankle_1 = read_data('shooting_sbj2_ankle_1.csv')
df_sub2_ankle = pd.concat([shooting_sub2_ankle_1,layup_sub2_ankle,dribble_sub2_ankle,movements_sub2_ankle_1])
df_sub2_ankle


#For subject1 ankle
layup_sub1_ankle = read_data('layup_sbj1_ankle.csv')
movements_sub1_ankle_1 = read_data('movements_sbj1_ankle_1.csv')
shooting_sub1_ankle = read_data('shooting_sbj1_ankle.csv')
df_sub1_ankle = pd.concat([shooting_sub1_ankle,layup_sub1_ankle,movements_sub1_ankle_1])
df_sub1_ankle


# For esense subject 2
df_esense_sub2 = read_esense('eSense_sbj2.csv')
df_esense_sub2


# For esense subject 1
df_esense_sub1 = read_esense('eSense_sbj1.csv')
df_esense_sub1


# For subject2 wrist
dribble_sub2_wrist = read_data('dribbling_sbj2_wrist.csv')
layup_sub2_wrist = read_data('layup_sbj2_wrist.csv')
movements_sub2_wrist = read_data('movements_sbj2_wrist.csv')
shooting_sub2_wrist = read_data('shooting_sbj2_wrist.csv')
df_sub2_wrist = pd.concat([shooting_sub2_wrist,layup_sub2_wrist,dribble_sub2_wrist,movements_sub2_wrist])
df_sub2_wrist


#For subject1 wrist
layup_sub1_wrist = read_data('layup_sbj1_wrist.csv')
movements_sub1_wrist = read_data('movements_sbj1_wrist.csv')
shooting_sub1_wrist = read_data('shooting_sbj1_wrist.csv')
df_sub1_wrist = pd.concat([shooting_sub1_wrist,layup_sub1_wrist,movements_sub1_wrist])
df_sub1_wrist


# data for subject 2
df_sub2 = pd.merge(pd.merge(df_esense_sub2,df_sub2_ankle,left_index = True,right_index = True),df_sub2_wrist,left_index = True, right_index = True)
df_sub2 = df_sub2.rename(columns = {'acc_x_x':'ankle_x','acc_y_x':'ankle_y','acc_z_x':'ankle_z','accx':'esense_x','accy':'esense_y','accz':'esense_z','acc1x':'gyro_x','acc1y':'gyro_y','acc1z':'gyro_z','acc_x_y':'wrist_x','acc_y_y':'wrist_y','acc_z_y':'wrist_z'})
df_sub2


# data for subject 1
df_sub1 = pd.merge(pd.merge(df_esense_sub1,df_sub1_ankle,left_index = True,right_index = True),df_sub1_wrist,left_index = True,right_index = True)
df_sub1 = df_sub1.rename(columns = {'acc_x_x':'ankle_x','acc_y_x':'ankle_y','acc_z_x':'ankle_z','accx':'esense_x','accy':'esense_y','accz':'esense_z','acc1x':'gyro_x','acc1y':'gyro_y','acc1z':'gyro_z','acc_x_y':'wrist_x','acc_y_y':'wrist_y','acc_z_y':'wrist_z'})
df_sub1


# Labelling
column = ['esense_x','esense_y','esense_z','gyro_x','gyro_y','gyro_z','ankle_x','ankle_y','ankle_z','wrist_x','wrist_y','wrist_z']
df_sub2[column] = (df_sub2[column]-df_sub2[column].mean())/df_sub2[column].std()
df_sub1[column] = (df_sub1[column]-df_sub1[column].mean())/df_sub1[column].std()
time1 = [df_sub1.index.isin(df_sub1.between_time('18:33:28', '18:35:44').index), 
        df_sub1.index.isin(df_sub1.between_time('18:23:24', '18:25:21').index),
        df_sub1.index.isin(df_sub1.between_time('18:45:18', '18:47:10').index),
        df_sub1.index.isin(df_sub1.between_time('18:13:32', '18:15:40').index),
        df_sub1.index.isin(df_sub1.between_time('18:42:49', '18:45:17').index)]
subject_1 = ['dribbling', 'layup', 'running', 'shooting', 'walking']
df_sub1['Label'] = np.select(time1, subject_1, 'Null')
df_sub1 = df_sub1[['esense_x','esense_y','esense_z','gyro_x','gyro_y','gyro_z','ankle_x','ankle_y','ankle_z','wrist_x','wrist_y','wrist_z','Label']]

time2 = [df_sub2.index.isin(df_sub2.between_time('19:27:02', '19:29:55').index), 
        df_sub2.index.isin(df_sub2.between_time('19:17:43', '19:20:07').index),
        df_sub2.index.isin(df_sub2.between_time('19:39:46', '19:41:50').index),
        df_sub2.index.isin(df_sub2.between_time('19:00:20', '19:02:03').index),
        df_sub2.index.isin(df_sub2.between_time('19:37:19', '19:39:46').index)]
subject_2 = ['dribbling', 'layup', 'running', 'shooting', 'walking']
df_sub2['Label'] = np.select(time2, subject_2, 'Null')
df_sub2 = df_sub2[['esense_x','esense_y','esense_z','gyro_x','gyro_y','gyro_z','ankle_x','ankle_y','ankle_z','wrist_x','wrist_y','wrist_z','Label']]
df = pd.concat([df_sub1,df_sub2]).reset_index(drop=True)


# Label Encoding
le = preprocessing.LabelEncoder()
label = le.fit_transform(df["Label"])
df["Label"] = label


# Splitting X and Y data
y_data = df['Label']
x_data = df.drop('Label',axis=1)


# Sliding window technique
def sliding_window(dataset,window_length,overlap_ratio):
    #Create empty lists
    windows = []
    indices = []
    non_overlap_elements = 0
    if overlap_ratio != None:
        overlap_elements = int((overlap_ratio/100)*(window_length))
    if overlap_elements >= window_length:
        print("Overlapping elements are more")
    while(non_overlap_elements < dataset.shape[0] - window_length):
        windows.append(dataset.iloc[non_overlap_elements:non_overlap_elements+window_length])
        indices.append([non_overlap_elements,non_overlap_elements+window_length])
        #Update non_overlap_elements
        non_overlap_elements = non_overlap_elements + window_length - overlap_elements
        try:
            final_windows = np.array(windows)
            final_indices = np.array(indices)
        except:
            final_windows = np.empty( shape =(len(windows), window_length, dataset.shape[1]), dtype =object)
            final_indices = np.array(indices)
            for i in range(0,len(windows)):
                final_windows[i] = windows[i]
                final_indices[i] = indices[i]
    return final_windows


# X_train, y_train, X_test, y_test
# Splitting the data into 6 folds. One of them is used for testing among these 6 folds each time
# Splitting is done into 6 folds equally and excluding the reamaining data
# array_data_x is representation of all the windows x_data that is without label column
# array_data_y is representation of all the windows y_data that is the label column
# X_train is the split of train data from array_data_x
# X_test is the split of test data from array_data_x
# y_train is the split of train data from array_data_y
# y_test is the split of test data from array_data_y
window_length = 100
s_x = sliding_window(x_data,window_length,50)
s_y = sliding_window(y_data,window_length,50)
lists = []
b_x = np.asarray(s_x, dtype=np.float32)
b_y = np.asarray(s_y, dtype=np.float32)
t_x = torch.from_numpy(b_x)
t_y = torch.from_numpy(b_y)
length_x = int(len(t_x)%6)
length_y = int(len(t_y)%6)
data_x = t_x[0:len(s_x)-length_x]
data_y = t_y[0:len(s_y)-length_y]
final_data_x = np.array_split(data_x,6)
final_data_y = np.array_split(data_y,6)
array_data_x = np.array([np.array(xi) for xi in final_data_x])
array_data_y = np.array([np.array(yi) for yi in final_data_y])
X_train = array_data_x[0:5]
X_test = array_data_x[5:]
s_y = torch.tensor(stats.mode(s_y,axis=1).mode)
y_train = np.vstack(array_data_y[0:5])
y_train = torch.tensor(stats.mode(y_train,axis=1).mode)
y_test = np.vstack(array_data_y[5:])
y_test = torch.tensor(stats.mode(y_test,axis=1).mode)
array_data_x = torch.from_numpy(array_data_x)
array_data_x = array_data_x.reshape(array_data_x.shape[0]*array_data_x.shape[1],array_data_x.shape[2],array_data_x.shape[3])
array_data_y = np.vstack(array_data_y)
array_data_y = torch.tensor(stats.mode(array_data_y,axis=1).mode)
array_data_y = array_data_y.reshape(array_data_y.shape[0],)
s_y = s_y.reshape(s_y.shape[0],)


# Network with 2 conv1D layers and 3 fully connected layers.
class SimpleCNN(nn.Module):
    def __init__ (self):
        super(SimpleCNN,self).__init__()
        in_1 = 12
        out_1 = 32
        out_2 = 64
        self.conv1 = nn.Conv1d(in_1,out_1,kernel_size=5)
        self.maxpool1 = nn.MaxPool1d(kernel_size=2,stride=1)
        self.bn1 = nn.BatchNorm1d(out_1)
        self.conv2 = nn.Conv1d(out_1,out_2,kernel_size=5)
        self.maxpool2 = nn.MaxPool1d(kernel_size=2,stride=1)
        self.bn2 = nn.BatchNorm1d(out_2)
        self.fc1 = nn.Linear(5760,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,6)
    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = self.bn1(x)
        x = F.relu(self.conv2(x))
        x = self.maxpool2(x)
        x = self.bn2(x)
        x = x.view(543,5760)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        yhat = torch.FloatTensor(x)
        return yhat


# Calling SimpleCNN and running on GPU
net = SimpleCNN()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = net.to(device)

criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)
optimizer = optim.Adam(net.parameters(),lr = 0.001)


# Applying kfold cross validation to get shuffling between windows and sending different train, test data as 6fold each time.
cv = KFold(n_splits=6, random_state=5, shuffle=True)
i = 0
j = 0
correct = []
loss =[]
test_loss_list = []
train_loss_list = []
train_correct_list = []
test_correct_list = []
for train_index, test_index in cv.split(array_data_x):
    full_loss = 0
    full_correct = 0
    X_train, X_test = array_data_x[train_index], array_data_x[test_index]
    y_train, y_test = array_data_y[train_index], array_data_y[test_index]
    X_train = X_train.to(device)
    X_test = X_test.to(device)
    y_train = y_train.to(device)
    y_test = y_test.to(device)
    X_test = X_test.reshape(X_test.shape[0],X_test.shape[2],X_test.shape[1])
    y_train = np.array_split(y_train,5)
    y_train = np.array([np.array(yi) for yi in y_train]) 
    y_train = torch.from_numpy(y_train)
    train_loader = DataLoader(X_train,int(X_train.shape[0]/5))
    total_test_correct = 0
    test_loss = 0
    test_correct = 0
    train_loss = 0
    train_correct = 0
    keras.backend.clear_session()
    
    # Number of epochs applied on training and testing data
    # total_train_loss is train loss after each epoch
    # total_test_loss is test loss after each epoch
    # total_train_correct is train accuracy after each epoch
    # total_test_correct is test accuracy after each epoch
    for epoch in range(100):
        total_loss = 0
        total_correct = 0
        correct = 0
        for batch in train_loader:
            net.train()
            batch = batch.reshape(batch.shape[0],batch.shape[2],batch.shape[1])
            preds = net(batch)
            loss = criterion(preds,y_train[i].long())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step() 
            total_loss += loss.item()
            preds_softmax = F.softmax(preds, dim = 1)
            _, preds_tags = torch.max(preds_softmax, dim = 1)
            correct_pred = (preds_tags == y_train[i]).float()
            total_correct = torch.tensor(correct_pred.sum().item() / (len(correct_pred)))
            total_correct = torch.round(total_correct * 100)
            correct += total_correct
            i = i+1
            j = j+1
            if (i >= 5):
                i = 0
        with torch.no_grad():
            net.eval()
            test_pred = net(X_test)
            total_test_loss = criterion(test_pred,y_test.long())
            test_softmax = F.softmax(preds, dim = 1)
            _, test_tags = torch.max(test_softmax, dim = 1)
            correct_test_pred = (test_tags == y_test).float()
            total_test_correct = torch.tensor(correct_test_pred.sum().item() / (len(correct_test_pred)))
            total_test_correct = torch.round(total_test_correct * 100)
        total_loss = total_loss/5
        correct = correct/5
        test_loss += total_test_loss.item()
        test_correct += total_test_correct
        train_correct += correct
        train_loss += total_loss
        
    # Averaging train and test loss after all epochs for each dataset.
    # Averaging train and test correct after all epochs for each dataset.
    test_loss = test_loss/100
    train_correct = train_correct/100
    train_loss = train_loss/100
    test_correct = test_correct/100
    
    # Appending to train_loss_list and test_loss_list after averaging for each dataset.
    # Appending to train_correct_list and test_correct_list after averaging for each dataset.
    test_loss_list.append(test_loss)
    test_correct_list.append(test_correct)
    train_correct_list.append(train_correct)
    train_loss_list.append(train_loss)
    full_loss += np.mean(train_loss_list)
    full_correct += np.mean(train_correct_list)