**Nishanth Iruthayaraj**

**Importing and Reading**

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('df_activity.csv', index_col = 0)
#df.head()

**Encoding the Label**

In [None]:
from sklearn.preprocessing import LabelEncoder

label = LabelEncoder()
df['Label'] = label.fit_transform(df['Label'].values)

#To check corresponding label values
#df['Label_encoded'] = LabelEncoder().fit_transform(df['Label'].values)
#df.sample(n=50)

**Without Sliding Window**

In [None]:
Xs = df.iloc[:,:-1].values
ys = df.iloc[:,-1].values

**Spliting Train and Test Set**

In [None]:
from sklearn.model_selection import train_test_split

Xs1_train, Xs1_test, ys1_train, ys1_test = train_test_split(Xs, ys, test_size = 0.2, random_state = 1, shuffle = False)
Xs1_train.shape, Xs1_test.shape, ys1_train.shape, ys1_test.shape

**KNN Model**

In [None]:
from sklearn.neighbors import KNeighborsClassifier

n = 91
#n = 7 # uncomment this and comment above for using entire data set as test 
classify = KNeighborsClassifier(n_neighbors= n, p = 2, n_jobs = -1)
classify.fit(Xs1_train, ys1_train)

In [None]:
ys1_pred = classify.predict(Xs1_test)
#ys1_pred = classify.predict(Xs) # uncomment this and comment above for using entire data set as test 

**Accuracy score and Confusion Matrix**

In [None]:
from sklearn.metrics import accuracy_score

print('Accuracy score : ', accuracy_score(ys1_test, ys1_pred))
#print('Accuracy score : ', accuracy_score(ys, ys1_pred)) # uncomment this and comment above for using entire data set

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(ys1_test, ys1_pred)
#confusion_matrix(ys, ys1_pred) # uncomment this and comment above for using entire data set 

**With Sliding Window**

In [None]:
def sliding_window_samples(data, samples_per_window, overlap_ratio):
    windows = []
    indices = []
    curr =  0
    win_len = samples_per_window
    if overlap_ratio !=  None :
        overlapping_elements = int((overlap_ratio / 100) * (win_len))
    if overlapping_elements >= win_len:
        print('Number of overlapping elements exceeds window size.')
    while (curr < len(data) - win_len):
         windows.append(data[curr:curr + win_len])
         indices.append([curr, curr + win_len])
         curr = curr + win_len - overlapping_elements
         
         try:
            result_windows = np.array(windows)
            result_indices = np.array(indices)
         except:
             result_windows = np.empty( shape =(len(windows), win_len, data.shape[1]), dtype =object)
             result_indices = np.array(indices)
             for i in range(0, len(windows)):
                result_windows[i] = windows[i]
                result_indices[i] = indices[i]
    return result_windows, result_indices

In [None]:
window_length = 100
window_skip = 50
df_1, df_2 = sliding_window_samples(df, window_length, window_skip)
data = np.array(df_1, dtype = float)
data.shape

**Spliting Train and Test Set**

In [None]:
from sklearn.model_selection import train_test_split

X = data[:, : ,0:12]
y = data[:, : , 12]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 123, shuffle = False)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

**Dimensionality reduction for KNN Model**

In [None]:
def dim_reduction(s):
    axis = 1
    y, indices = np.unique(s, return_inverse=True)
    y_new = y[np.argmax(np.apply_along_axis(np.bincount, axis, indices.reshape(s.shape),None, np.max(indices) + 1), axis=axis)]
    return y_new

In [None]:
X1_train = np.reshape(X_train, (X_train.shape[0], -1))
X1_test = np.reshape(X_test, (X_test.shape[0], -1))

y1_train = dim_reduction(y_train) # Taking most common value in each window
y1_test = dim_reduction(y_test)
#y1_train = y_train[:,99] Taking last value in each window
#y1_test = y_test[:,99]


**Reshaping entire dataset for testing purpose**

In [None]:
Xd = np.reshape(X, (X.shape[0], -1))
yd = dim_reduction(y)

**KNN Model**

In [None]:
from sklearn.neighbors import KNeighborsClassifier

n = 51
#n = 3 # uncomment this and comment above for using entire data set as test
classify = KNeighborsClassifier(n_neighbors= n, p = 2, n_jobs = -1)
classify.fit(X1_train, y1_train)

In [None]:
y1_pred = classify.predict(X1_test)
#y1_pred = classify.predict(Xd) # uncomment this and comment above for using entire data set as test

**Accuracy score and Confusion Matrix**

In [None]:
from sklearn.metrics import accuracy_score

print('Accuracy score : ', accuracy_score(y1_test, y1_pred))
#print('Accuracy score : ', accuracy_score(yd, y1_pred)) # uncomment this and comment above for using entire data set 

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y1_test, y1_pred)
#confusion_matrix(yd, y1_pred) # uncomment this and comment above for using entire data set 