In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns
from sklearn.metrics import confusion_matrix
import scipy.stats as stats
from sklearn import metrics
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Activity Recognition/Datasets/merged_processed_data.csv')
data

In [None]:
data['user_id'].value_counts()

In [None]:
data['label'].value_counts()

In [None]:
five = data[data['label']==5.0].head(200000).copy()
four = data[data['label']==4.0].head(200000).copy()
seven = data[data['label']==7.0].head(108500).copy()
three = data[data['label']==3.0].head(98000).copy()
twelve = data[data['label']==12.0].head(48000).copy()
one = data[data['label']==1.0].head(23000).copy()
ten = data[data['label']==10.0].head(2680).copy()
nine = data[data['label']==9.0].head(2510).copy()
six = data[data['label']==6.0].head(1842).copy()
two = data[data['label']==2.0].head(1710).copy()
eleven = data[data['label']==11.0].head(705).copy()
eight = data[data['label']==8.0].head(297).copy()

In [None]:
balanced_data = pd.DataFrame()
balanced_data = balanced_data.append([one, two, three, four, five, six, seven, eight, nine, ten, eleven, twelve])
balanced_data

**Standardization**

In [None]:
X = balanced_data[['x','y','z']]
y = balanced_data['label']

In [None]:
scaler = StandardScaler()
scaled_X = scaler.fit_transform(X)
scaled_X

In [None]:
from imblearn.combine import SMOTETomek

smt = SMOTETomek(ratio='auto')
X_smt, y_smt = smt.fit_sample(np.asarray(scaled_X), np.asarray(y))

In [None]:
scaled_balanced_X = pd.DataFrame(data=X_smt, columns=['x', 'y', 'z'])
y = y_smt
scaled_balanced_X

In [None]:
# test_data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Activity Recognition/Datasets/testing/raw_field_acc_user27.csv')
# test_data = data.sort_values(by='datetime')
# test_data

In [None]:
# EX = test_data[['x','y','z']]
# Ey = test_data['label']

In [None]:
# scalerE = StandardScaler()
# X = scalerE.fit_transform(X)

# scaled_EX = pd.DataFrame(data=EX, columns=['x', 'y', 'z'])
# scaled_EX['label'] = Ey.values

# scaled_EX

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_balanced_X, y, test_size = 0.2, random_state = 0, stratify = y)
y_act = np.asarray(y_test)

**Framing**

In [None]:
frame_size = 120
hop_size = 30

In [None]:
def get_frames(df,lbl, frame_size, hop_size):
    
    N_FEATURES = 3
    
    frames = []
    labels = []
    users = []
    
    for i in range(0, len(df)-frame_size, hop_size):
        x = df['x'].values[i: i+frame_size]
        y = df['y'].values[i: i+frame_size]
        z = df['z'].values[i: i+frame_size]
        
        label = stats.mode(lbl[i: i+frame_size])[0][0]
        frames.append([x, y, z])
        labels.append(label)
        
    frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
    labels = np.asarray(labels)
    
    return frames, labels

In [None]:
Frame_X, y = get_frames(X_train, y_train, frame_size, hop_size)
Frame_X.shape, y.shape

In [None]:
Frame_EX, Ey = get_frames(X_test, y_test, frame_size, hop_size)
Frame_EX.shape, Ey.shape

**Feature Extraction (Mean, Median, Mode, Variance, Standard Deviation, RMS)**

In [None]:
X = []
# Mean_X = []
# Median_X = []
# Mode_X = []
# Var_X = []
# Std_X = []
# RMS_X = []
for i in range(Frame_X.shape[0]):
  mean_X = np.mean(Frame_X[i],axis=0)
  median_X = np.median(Frame_X[i],axis=0)
  mode_X = np.asarray(stats.mode(Frame_X[i])[0][0])
  variance_X = np.var(Frame_X[i],axis=0)
  std_deviation_X = np.std(Frame_X[i],axis=0)
  rms_X = np.sqrt(np.mean(np.square(Frame_X[i]),axis=0))
  max_X = np.max(Frame_X[i],axis=0)
  min_X = np.min(Frame_X[i],axis=0)
  mav_X = np.max(np.absolute(Frame_X[i]),axis=0) #maximum absolute value
  # comb = np.array([mean_X,median_X,mode_X,variance_X,std_deviation_X,rms_X])
  comb = np.array([mean_X,median_X,mode_X,variance_X,std_deviation_X,rms_X,max_X,min_X,mav_X])
  X.append(comb.reshape(-1))
  # Mean_X.append(mean_X)
  # Median_X.append(median_X)
  # Mode_X.append(mode_X)
  # Var_X.append(variance_X)
  # Std_X.append(std_deviation_X)
  # RMS_X.append(rms_X)
X = np.asarray(X)
# Mean_X = np.asarray(Mean_X)
# Median_X = np.asarray(Median_X)
# Mode_X = np.asarray(Mode_X)
# Var_X = np.asarray(Var_X)
# Std_X = np.asarray(Std_X)
# RMS_X = np.asarray(RMS_X)
X.shape

In [None]:
EX = []
# Mean_X = []
# Median_X = []
# Mode_X = []
# Var_X = []
# Std_X = []
# RMS_X = []
for i in range(Frame_EX.shape[0]):
  mean_X = np.mean(Frame_EX[i],axis=0)
  median_X = np.median(Frame_EX[i],axis=0)
  mode_X = np.asarray(stats.mode(Frame_EX[i])[0][0])
  variance_X = np.var(Frame_EX[i],axis=0)
  std_deviation_X = np.std(Frame_EX[i],axis=0)
  rms_X = np.sqrt(np.mean(np.square(Frame_EX[i]),axis=0))
  max_X = np.max(Frame_EX[i],axis=0)
  min_X = np.min(Frame_EX[i],axis=0)
  mav_X = np.max(np.absolute(Frame_EX[i]),axis=0) #maximum absolute value
  # comb = np.array([mean_X,median_X,mode_X,variance_X,std_deviation_X,rms_X])
  comb = np.array([mean_X,median_X,mode_X,variance_X,std_deviation_X,rms_X,max_X,min_X,mav_X])
  EX.append(comb.reshape(-1))
  # Mean_X.append(mean_X)
  # Median_X.append(median_X)
  # Mode_X.append(mode_X)
  # Var_X.append(variance_X)
  # Std_X.append(std_deviation_X)
  # RMS_X.append(rms_X)
EX = np.asarray(EX)
# Mean_X = np.asarray(Mean_X)
# Median_X = np.asarray(Median_X)
# Mode_X = np.asarray(Mode_X)
# Var_X = np.asarray(Var_X)
# Std_X = np.asarray(Std_X)
# RMS_X = np.asarray(RMS_X)
EX.shape

In [None]:
# #TESTING
# X = []
# for i in range(Frame_X.shape[0]):
#   mode = np.asarray(stats.mode(Frame_X[i],axis=0)[0][0])
#   Mode_eq = []
#   Frame = Frame_X[i]
#   for j in range(Frame.shape[0]):
#     check = 0
#     cnt = 0
#     idx = Frame[j]
#     for k in range(idx.shape[0]):
#       if idx[k] == mode[k]:
#         check = 1
#     if check == 1 :
#       Mode_eq.append(idx)
#   Mode_eq = np.asarray(Mode_eq)
#   mean = np.mean(Mode_eq,axis=0)
#   median = np.median(Mode_eq,axis=0)
#   variance = np.var(Mode_eq,axis=0)
#   std_deviation = np.std(Mode_eq,axis=0)
#   rms = np.sqrt(np.mean(np.square(Mode_eq),axis=0))
#   # mav = np.max(np.absolute(Mode_eq),axis=0) #maximum absolute value
#   comb = np.array([mean,median,mode,variance,std_deviation,rms])
#   X.append(comb.reshape(-1))
# X = np.asarray(X)
# X.shape

**Training and Testing using KNN**

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 0, stratify = y)
X_train = X
y_train = y
X_test = EX
y_test = Ey
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
model = KNeighborsClassifier(n_neighbors=11)

#Train
model.fit(X_train,y_train) 

**Confusion Matrix and HeatMap**

In [None]:
#Prediction
y_pred = model.predict(X_test)

y_ans = []

for i in range (y_pred.shape[0]):
  for j in range (0,30):
    if y_act.shape[0] > len(y_ans):
      y_ans.append(y_pred[i])


while y_act.shape[0] > len(y_ans) : 
  lbl = stats.mode(y_ans[-31: -1])[0][0]
  y_ans.append(lbl)

y_ans = np.asarray(y_ans)

# Model Accuracy
print("Accuracy:",metrics.accuracy_score(y_act, y_ans)*100,"%")

In [None]:
mat = confusion_matrix(y_act,y_ans)
plt.figure(figsize = (9,7))
heat = sns.heatmap(mat,square=True,annot=True,fmt='d',cbar=True,cmap=plt.cm.gist_heat)
class_label = []
for i in range (1,13):
  class_label.append(i)
class_label = np.asarray(class_label)
heat.set_xticklabels(class_label)
heat.set_yticklabels(class_label)
heat.set_xlabel('Actual Value')
heat.set_ylabel('Model Prediction')

In [None]:
# model = KNeighborsClassifier(n_neighbors=25)

# #Train
# model.fit(X_train,y_train)

# #Prediction
# y_pred = model.predict(X_test)

# # Model Accuracy
# print("Accuracy:",metrics.accuracy_score(y_test, y_pred)*100,"%")

**Testing with Median**

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(Median_X, y, test_size = 0.2, random_state = 0, stratify = y)
# X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# model = KNeighborsClassifier(n_neighbors=37)

# #Train
# model.fit(X_train,y_train)

# #Prediction
# y_pred = model.predict(X_test)

# # Model Accuracy
# print("Accuracy:",metrics.accuracy_score(y_test, y_pred)*100,"%")

**Testing with Mode**

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(Mode_X, y, test_size = 0.2, random_state = 0, stratify = y)
# X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# model = KNeighborsClassifier(n_neighbors=21)

# #Train
# model.fit(X_train,y_train)

# #Prediction
# y_pred = model.predict(X_test)

# # Model Accuracy
# print("Accuracy:",metrics.accuracy_score(y_test, y_pred)*100,"%")

**Testing with Variance**

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(Var_X, y, test_size = 0.2, random_state = 0, stratify = y)
# X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# model = KNeighborsClassifier(n_neighbors=37)

# #Train
# model.fit(X_train,y_train)

# #Prediction
# y_pred = model.predict(X_test)

# # Model Accuracy
# print("Accuracy:",metrics.accuracy_score(y_test, y_pred)*100,"%")

**Testing with Standard Deviation**

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(Std_X, y, test_size = 0.2, random_state = 0, stratify = y)
# X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# model = KNeighborsClassifier(n_neighbors=33)

# #Train
# model.fit(X_train,y_train)

# #Prediction
# y_pred = model.predict(X_test)

# # Model Accuracy
# print("Accuracy:",metrics.accuracy_score(y_test, y_pred)*100,"%")

**Testing with Root Mean Square**

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(RMS_X, y, test_size = 0.2, random_state = 0, stratify = y)
# X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# model = KNeighborsClassifier(n_neighbors=19)

# #Training
# model.fit(X_train,y_train)

# #Prediction
# y_pred = model.predict(X_test)

# # Model Accuracy
# print("Accuracy:",metrics.accuracy_score(y_test, y_pred)*100,"%")

In [None]:
# mx = 0.0
# pos = 0
# for i in range(100):
#   model = KNeighborsClassifier(n_neighbors=2*i+1)

#   #Train
#   model.fit(X_train,y_train)

#   #Prediction
#   y_pred = model.predict(X_test)

#   # Model Accuracy
#   # print(2*i+1,"Accuracy:",metrics.accuracy_score(y_test, y_pred)*100,"%")
  
#   if mx<metrics.accuracy_score(y_test, y_pred)*100:
#     mx = metrics.accuracy_score(y_test, y_pred)*100
#     pos = 2*i+1
# print(pos)

# **When we use all the features we get better accuracy rather than taking them individually. And also it doesn't work better on balanced data**
