# CLASSICAL APPROACHES 

1. Import pandas and numpy libraries.
2. Read the dataset csv file into a pandas Dataframe.
3. Split the dataset per subject and read them into two separate dataframes.

In [None]:
import pandas as pd
import numpy as np

dataframe = pd.read_csv('..\sensor_data\Dataset.csv')
dataframe = dataframe[['E1', 'E2', 'E3', 'E4', 'E5', 'E6', 'A1', 'A2', 'A3', 'W1', 'W2', 'W3', 'Subject', 'Label']]
dataframe = dataframe.fillna('null')
df_subj1 = dataframe.loc[dataframe['Subject'] == 'Subject1']
df_subj2 = dataframe.loc[dataframe['Subject'] == 'Subject2']
new_df_subj2 = df_subj2.loc[df_subj2['Label'] != 'dribbling']

Descriptive Statistics (Overview of the Databases)

In [None]:
import matplotlib.pyplot as plt

dataframe["Label"].hist(bins=15)
plt.show()

df_subj1["Label"].hist(bins=15)
plt.show()

df_subj2["Label"].hist(bins=15)
plt.show()

Define Sliding window method

In [None]:
def sliding_window_samples(data, samples_per_window, overlap_ratio):
    windows = []
    indices = []
    curr =  0
    win_len = samples_per_window
    if overlap_ratio !=  None :
        overlapping_elements = int((overlap_ratio / 100) * (win_len))
    if overlapping_elements >= win_len:
        print('Number of overlapping elements exceeds window size.')
    while (curr < len(data) - win_len):
         windows.append(data[curr:curr + win_len])
         indices.append([curr, curr + win_len])
         curr = curr + win_len - overlapping_elements
         
         try:
            result_windows = np.array(windows)
            result_indices = np.array(indices)
         except:
             result_windows = np.empty( shape =(len(windows), win_len, data.shape[1]), dtype =object)
             result_indices = np.array(indices)
             for i in range(0, len(windows)):
                result_windows[i] = windows[i]
                result_indices[i] = indices[i]
    return result_windows, result_indices

1. Import kNN classifier and train the model
2. Check Metrics

1. Import SVM classifier and train the model
2. Check Metrics

Per subject validation using Sliding window
1. Set test subject as required
2. Remove unnecessary Columns
3. Converting string labels into numbers using labelencoder
4. Apply sliding window and then split data into train and test sets
5. Reshape the train and test matrices to match with the algorithm input shape

In [None]:
def per_subject_SL(dataframe):
    X = dataframe.drop("Label", axis=1)
    X = X.drop("Subject", axis=1)
    y = dataframe["Label"]

    # Import LabelEncoder
    from sklearn import preprocessing
    #creating labelEncoder
    le = preprocessing.LabelEncoder()
    # Converting string labels into numbers.
    y_encoded=le.fit_transform(y)

    X_windows, X_indices = sliding_window_samples(X, 100, 70)
    y_windows, y_indices = sliding_window_samples(y_encoded, 100, 70)
        
    X_windows = np.reshape(X_windows, (X_windows.shape[0], -1))
    from scipy import stats
    y_windows = (stats.mode(y_windows, axis=1)).mode
    y_windows = np.reshape(y_windows, (y_windows.shape[0],))
    
    #Import kNN model
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.model_selection import cross_val_score
    #Create a kNN Classifier
    model = KNeighborsClassifier(n_neighbors=5)
    
    print("kNN")
    # Model Accuracy, how often is the classifier correct?
    print("Accuracy:", (cross_val_score(model, X_windows, y_windows, cv=10).mean()))
    # Model f1_score: weighted average of the precision and recall
    print("f1_score:", (cross_val_score(model, X_windows, y_windows, cv=10, scoring='f1_macro').mean()))
    
    #Import svm model
    from sklearn import svm
    from sklearn.model_selection import cross_val_score
    #Create a svm Classifier
    clf = svm.SVC(kernel='rbf', C=1, gamma='auto', random_state=42)
    
    print("SVM")
    # Model Accuracy, how often is the classifier correct?
    print("Accuracy:", (cross_val_score(clf, X_windows, y_windows, cv=10).mean()))
    # Model f1_score: weighted average of the precision and recall
    print("f1_score:", (cross_val_score(clf, X_windows, y_windows, cv=10, scoring='f1_macro').mean()))

Cross subject validation using Sliding window
1. Remove data with label 'dribbling' for subject2
2. Select train and test subjects as required
3. Remove unnecessary Columns
4. Converting string labels into numbers using labelencoder
5. Apply sliding window on the train and test sets 
6. Reshape the train and test matrices to match with the algorithm input shape

In [None]:
def cross_subject_SL(train_data, test_data):
    X_train = train_data.drop("Label", axis=1)
    X_train = X_train.drop("Subject", axis=1)
    y_train = train_data["Label"]

    X_test = test_data.drop("Label", axis=1)
    X_test = X_test.drop("Subject", axis=1)
    y_test = test_data["Label"]

    # Import LabelEncoder
    from sklearn import preprocessing
    #creating labelEncoder
    le = preprocessing.LabelEncoder()
    # Converting string labels into numbers.
    y_train=le.fit_transform(y_train)
    y_test=le.fit_transform(y_test)

    X_train, X_indices_train = sliding_window_samples(X_train, 100, 70)
    y_train, y_indices_train = sliding_window_samples(y_train, 100, 70)
    X_test, X_indices_test = sliding_window_samples(X_test, 100, 70)
    y_test, y_indices_test = sliding_window_samples(y_test, 100, 70)
    
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))

    from scipy import stats
    y_train = (stats.mode(y_train, axis=1)).mode
    y_train = np.reshape(y_train, (y_train.shape[0],))
    y_test = (stats.mode(y_test, axis=1)).mode
    y_test = np.reshape(y_test, (y_test.shape[0],))

   #Import kNN model
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.model_selection import cross_val_score
    #Create a kNN Classifier
    model = KNeighborsClassifier(n_neighbors=5)
    #Train the model using the training sets    
    model.fit(X_train,y_train)
    #Predict Output
    y_pred = model.predict(X_test)
    
    from sklearn import metrics
    print("kNN")
    # Model Accuracy, how often is the classifier correct?
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
    # Model f1_score: weighted average of the precision and recall
    print("f1_score:", metrics.f1_score(y_test, y_pred, average='weighted'))
    
    #Import svm model
    from sklearn import svm
    from sklearn.model_selection import cross_val_score
    #Create a svm Classifier
    clf = svm.SVC(kernel='rbf', C=1, gamma='auto', random_state=42)
    #Train the model using the training sets
    clf.fit(X_train, y_train)
    #Predict the response for test dataset
    y_pred = clf.predict(X_test)
    
    from sklearn import metrics
    print("SVM")
    # Model Accuracy, how often is the classifier correct?
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
    # Model f1_score: weighted average of the precision and recall
    print("f1_score:", metrics.f1_score(y_test, y_pred, average='weighted'))

In [None]:
# Normal and Per subject validations
print("OverallDataset:")
per_subject_SL(dataframe) # Overall dataset

print("Subject1:") 
per_subject_SL(df_subj1) # Subject 1

print("Subject2:")
per_subject_SL(df_subj2) # Subject 2

# Cross subject validations
print("Subject1:Train, Subject2:Test") 
cross_subject_SL(df_subj1, new_df_subj2) # Subject1:Train

print("Subject2:Train, Subject1:Test")
cross_subject_SL(new_df_subj2, df_subj1) # Subject2:Train