In [73]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# plotly visualization library
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly_express as px
from plotly import tools

# dimensionality decomposition libarires
from sklearn.decomposition import PCA
from umap import UMAP

#classifiers
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB

## Reading files and preprocessing

In [74]:
input_path = '../data/MovementAAL/dataset/MovementAAL_RSS_'
target_path = '../data/MovementAAL/dataset/MovementAAL_target.csv'
group_path = '../data/MovementAAL/groups/MovementAAL_DatasetGroup.csv'

### -------------------------------------------------------- loading files
file_dfs = []
file_lens = []

for i in range(1,315):
    file_path = input_path + str(i) + '.csv'
    file_df = pd.read_csv(file_path, header=0).reset_index(drop=True)
    file_df['_id'] = i 
    file_df = file_df.reset_index(drop=True)
    
   
    file_dfs.append(file_df)
    file_lens.append(file_df.shape[0])
    

input_df = pd.concat(file_dfs)
target_df = pd.read_csv(target_path)[' class_label']
group_df = pd.read_csv(group_path)[' dataset_ID']

gr_li = []
for idx, (id_num, target, group) in enumerate(zip(input_df['_id'].unique(), target_df, group_df)):
    gr = input_df[input_df['_id'] == id_num]
    gr = gr.reset_index(drop=True)
    gr['target'] = [target]*gr.shape[0]
    gr['group'] = [group]*gr.shape[0]
    gr['time'] = np.arange(0, gr.shape[0]/8, 1/8)
    gr_li.append(gr)


df = pd.concat(gr_li)
df = df.reset_index(drop=True)

# adding target label and group label
df['target_label'] = df['target'].apply(lambda x: 'Movement' if x==1 else 'Non-Movement')
df['group_label'] = (
                    np.select(condlist = [df['group']==1, df['group']==2, df['group']==3],
                             choicelist=['environment_1', 'environment_2', 'environment_3'])
                    )
# rearranging the columns
df = df.rename(columns={'#RSS_anchor1':'RSS_anchor1', ' RSS_anchor2':'RSS_anchor2', ' RSS_anchor3':'RSS_anchor3', ' RSS_anchor4':'RSS_anchor4'}) 

df = df[['_id', 'time', 'RSS_anchor1', 'RSS_anchor2', 'RSS_anchor3', 'RSS_anchor4',
        'target', 'target_label', 'group', 'group_label']]

# saving the preprocessed file
df.to_csv('../data/indoor_movement.csv', index = False)

## Visualization

In [75]:
df_red = df[df['_id'].isin(np.arange(25, 30))]
df_red.to_csv('../data/indoor_movement_red.csv', index = False)

# signal visulalization

for i in range(1,5):
    fig = px.line(df_red, x="time", y="RSS_anchor{}".format(i), color="target_label", line_group="_id", 
                  hover_name="target", line_shape="linear", width = 950, height = 550, line_dash = '_id',
                  facet_row = 'target_label', animation_frame = 'group_label', color_discrete_sequence = px.colors.qualitative.Alphabet)
    iplot(fig)


In [76]:
# histogram and density plot
for i in range(1,5):
    fig = px.histogram(df, x='RSS_anchor{}'.format(i), y='RSS_anchor{}'.format(i), color = 'target_label', facet_col = 'target_label', barmode = 'group', nbins=70, height = 400, width=990, color_discrete_sequence = px.colors.qualitative.Plotly[-7:-5])
    iplot(fig)

In [None]:
# dimensionality decomposition
input_cols = df[['RSS_anchor1', 'RSS_anchor2', 'RSS_anchor3', 'RSS_anchor4']]
target_col = df[['target_label']]
target_group = df[['group_label']]

pca = PCA(n_components=2, random_state=42)
umap = UMAP(n_components=2, random_state=42)

pca_res = pca.fit_transform(input_cols)
pca_res = pd.DataFrame(pca_res, columns = ['pc1', 'pc2'])
pca_res = pd.concat([pca_res, target_col,target_group], axis=1, sort = False)
pca_res = pca_res.sample(n=1000, random_state=42)


umap_res = umap.fit_transform(input_cols)
umap_res = pd.DataFrame(umap_res, columns = ['umap_x', 'umap_y'])
umap_res = pd.concat([umap_res, target_col,target_group], axis=1, sort = False)
umap_res = umap_res.sample(n=1000, random_state=42)

In [None]:
px.scatter(pca_res, x="pc1", y="pc2", color="target_label", 
              hover_name="target_label", width = 990, height = 500,
           color_discrete_sequence = px.colors.qualitative.Vivid, facet_col = 'group_label', size_max=5
              )


In [None]:
px.scatter(umap_res, x="umap_x", y="umap_y", color="target_label", 
              hover_name="target_label", width = 990, height = 550,
           color_discrete_sequence = px.colors.qualitative.Vivid, animation_frame='group_label',
              )

### SKlearn Classification Comparison

In [None]:
X = df[['RSS_anchor1', 'RSS_anchor2', 'RSS_anchor3', 'RSS_anchor4']].values
y = df['target'].values
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=.20, random_state=42)

X_train = StandardScaler().fit_transform(X_train)
X_test = StandardScaler().fit_transform(X_test)
# base classification using Gradient Boosting classifier

clf = SVC(gamma = 'auto')
clf.fit(X_train, y_train)
clf.score(X_test, y_test)


In [None]:
# fig = tools.make_subplots(rows=10, cols=1,
#                          print_grid=False)

# h = .02  # step size in the mesh

# def matplotlib_to_plotly(cmap, pl_entries):
#     h = 1.0/(pl_entries-1)
#     pl_colorscale = []
    
#     for k in range(pl_entries):
#         C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
#         C = list(map(float,C))
#         pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])
        
#     return pl_colorscale


# names = ["Nearest Neighbors", "Linear SVM", 
#          "RBF SVM", "Gaussian Process","Decision Tree", 
#          "Random Forest", "Neural Net", "AdaBoost",
#          "Naive Bayes", "Gradient Boosting"]

# classifiers = [
#     KNeighborsClassifier(3),
#     SVC(kernel="linear", C=0.025),
#     SVC(gamma=2, C=1),
#     GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
#     DecisionTreeClassifier(max_depth=5),
#     RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
#     MLPClassifier(alpha=1),
#     AdaBoostClassifier(),
#     GaussianNB(),
#     GradientBoostingClassifier()]

# X, y = X[:,0:2], y

# datasets = [(X,y)
#             ]

# i = 1
# j = 1
# # iterate over datasets
# for ds_cnt, ds in enumerate(datasets):
#     # preprocess dataset, split into training and test part
#     X, y = ds
#     X = StandardScaler().fit_transform(X)
#     X_train, X_test, y_train, y_test = \
#         train_test_split(X, y, test_size=.2, random_state=42)

#     x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
#     y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
#     xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
#                          np.arange(y_min, y_max, h))

#     # just plot the dataset first
#     cm = plt.cm.RdBu
#     cm_bright = ListedColormap(['#FF0000', '#0000FF'])
    
#     # Plot the training points
#     training_points = go.Scatter(x=X_train[:, 0],y=X_train[:, 1],showlegend=False,
#                                  mode='markers', marker=dict(color='red'))
#     # and testing points
#     testing_points = go.Scatter(x=X_test[:, 0], y=X_test[:, 1],showlegend=False,
#                                 mode='markers', marker=dict(color='blue'))

#     fig.append_trace(training_points, 1, j)
#     fig.append_trace(testing_points, 1, j)
   
   
    
#     # iterate over classifiers
#     i=2
#     for name, clf in zip(names, classifiers):
#         clf.fit(X_train, y_train)
#         score = clf.score(X_test, y_test)

#         # Plot the decision boundary. For that, we will assign a color to each
#         # point in the mesh [x_min, x_max]x[y_min, y_max].
#         if hasattr(clf, "decision_function"):
#             Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
#         else:
#             Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

#         # Put the result into a color plot
#         Z = Z.reshape(xx.shape)
        
#         trace = go.Contour(y=xx[0],z=Z,x=xx[0],
#                                 line=dict(width=0),
#                                 contours=dict( coloring='heatmap'),
#                                 colorscale= matplotlib_to_plotly(cm,300),
#                                 opacity = 0.7, showscale=False)

#         # Plot also the training points
        
#         training_points = go.Scatter(x=X_train[:, 0],y=X_train[:, 1],showlegend=False,
#                                  mode='markers', marker=dict(color='red'))
#         # and testing points
       
#         testing_points1 = go.Scatter(x=X_test[:, 0], y=X_test[:, 1],showlegend=False,
#                                 mode='markers', marker=dict(color='blue'))
        
       
#         fig.append_trace(training_points, i, j)
#         fig.append_trace(testing_points, i, j)
#         fig.append_trace(trace, i, j)
        
#         i=i+1
#     j+=1 
    
# for i in map(str, range(1,34)):
#     x='xaxis'+i
#     y='yaxis'+i
#     fig['layout'][y].update(showticklabels=False, ticks='',
#                            showgrid=False, zeroline=False)
#     fig['layout'][x].update(showticklabels=False, ticks='',
#                            showgrid=False, zeroline=False)
# k=0

# for x in map(str, range(1,32,3)):
#     y='yaxis'+x
#     fig['layout'][y].update(title=names[k])
#     k=k+1


# fig['layout'].update(height=2000)
# iplot(fig)