In [None]:
# Download and unzip database
# !wget https://archive.ics.uci.edu/ml/machine-learning-databases/00240/UCI%20HAR%20Dataset.zip
# !unzip "UCI HAR Dataset.zip"
# !wget https://github.com/sjriek/AIS7/blob/main/HAR.zip 
!wget https://github.com/sjriek/AIS7/raw/main/HAR.zip
!unzip HAR.zip



In [None]:
# function to plot confusion matrix
def plot_confusion_matrix(cm,lables):
    fig, ax = plt.subplots(figsize=(15,5)) # for plotting confusion matrix as image
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]),
    yticks=np.arange(cm.shape[0]),
    xticklabels=lables, yticklabels=lables,
    ylabel='True label',
    xlabel='Predicted label')
    plt.xticks(rotation = 90)
    plt.grid(b=False)
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, int(cm[i, j]),ha="center", va="center",color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()

In [None]:
# load dataset
from numpy import dstack
from pandas import read_csv

# load a single file as a numpy array
def load_file(filepath):
	dataframe = read_csv(filepath, header=None, delim_whitespace=True)
	return dataframe.values
 
# load a list of files, such as x, y, z data for a given variable
def load_group(filenames, prefix=''):
	loaded = list()
	for name in filenames:
		data = load_file(prefix + name)
		loaded.append(data)
	# stack group so that features are the 3rd dimension
	loaded = dstack(loaded)
	return loaded
 
# load a dataset group, such as train or test
def load_dataset(group, prefix=''):
	filepath = prefix + group + '/Inertial Signals/'
	# load all 9 files as a single array
	filenames = list()
	# total acceleration
	filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
	# body acceleration
	filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt', 'body_acc_z_'+group+'.txt']
	# body gyroscope
	filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
	# load input data
	X = load_group(filenames, filepath)
	# load class output
	y = load_file(prefix + group + '/y_'+group+'.txt')
	return X, y
 
# load all train
X_train, y_train = load_dataset('train', 'UCI HAR Dataset/')
print(X_train.shape, y_train.shape)
# load all test
X_test, y_test = load_dataset('test', 'UCI HAR Dataset/')
print(X_test.shape, y_test.shape)

In [None]:
# load dataset
from numpy import dstack
from pandas import read_csv

df_train = read_csv("./train.csv")
df_test = read_csv("./test.csv")

X_train, y_train = df_train.drop(['subject', 'Activity'], axis=1), df_train['Activity']
X_test, y_test = df_test.drop(['subject', 'Activity'], axis=1), df_test['Activity']

In [None]:
import numpy as np
import sklearn
from sklearn import model_selection
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.ensemble import RandomForestClassifier

model = KNeighborsClassifier(3)
# model = LogisticRegression(penalty='l2', C=10,solver='lbfgs',class_weight='balanced', max_iter=10000,random_state = 0)
model.fit(X_train, y_train)
score = model.score(X_test,y_test)
print (score)

In [None]:
from sklearn.metrics import confusion_matrix
predict = model.predict(X_test)
confusion = confusion_matrix(y_test,predict)
print("==== Confusion Matrix ===")
print(confusion)
print('\n')


plot_confusion_matrix(confusion, np.unique(predict))
#using matplotlib
plt.matshow(confusion)
plt.show()


# def plotMatrix(data):
#   fig, ax = plt.subplots()
#   # Using matshow here just because it sets the ticks up nicely. imshow is faster.
#   ax.matshow(data, cmap='viridis')
#   for (i, j), z in np.ndenumerate(data):
#      ax.text(j, i, '{:0.1f}'.format(z), ha='center', va='center')
#   plt.show()

# plotMatrix(confusion)

#using seaborn
# from sklearn import metrics
# cnf_matrix = metrics.confusion_matrix(y_test, predict)
# p = sns.heatmap(pd.DataFrame(cnf_matrix), annot=True)



#using pandas
pd.DataFrame(confusion).style.background_gradient(cmap='viridis').set_precision(4)



In [None]:
nsamples, nx, ny = X_train.shape
X2d_train = X_train.reshape((nsamples,nx*ny))

nsamples, nx, ny = X_test.shape
X2d_test = X_test.reshape((nsamples,nx*ny))

print(X2d_train.shape, y_train.shape)

print(X2d_test.shape, y_test.shape)


In [None]:
import numpy as np
import sklearn
from sklearn import model_selection
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.ensemble import RandomForestClassifier

# model = KNeighborsClassifier(3)
model = LogisticRegression(penalty='l2', C=10,solver='lbfgs',class_weight='balanced', max_iter=10000,random_state = 0)
model.fit(X2d_train, y_train)
score = model.score(X2d_test,y_test)
print (score)
# models = []

# models.append(('SVM', SVC(gamma='auto')))
# # models.append(('RFC', RandomForestClassifier(max_depth=5, n_estimators=40)))

# results = []
# names = []
# scoring = 'accuracy'


# for name, model in models:
#   kfold = model_selection.KFold(n_splits=10)
#   cv_results = model_selection.cross_val_score(model, X_train, y_train)
#   print(cv_results)
#   results.append(cv_results)
#   names.append(name)
#   msg = "%s Algorithm: Accuracy %f (%f)" % (name, cv_results.mean(), cv_results.std())
#   print(msg)

In [None]:
from sklearn.metrics import confusion_matrix
predict = model.predict(X2d_test)
confusion = confusion_matrix(y_test,predict)
print("==== Confusion Matrix ===")
print(confusion)
print('\n')

#using matplotlib
# plt.matshow(confusion)
# plt.show()


# def plotMatrix(data):
#   fig, ax = plt.subplots()
#   # Using matshow here just because it sets the ticks up nicely. imshow is faster.
#   ax.matshow(data, cmap='viridis')
#   for (i, j), z in np.ndenumerate(data):
#      ax.text(j, i, '{:0.1f}'.format(z), ha='center', va='center')
#   plt.show()

# plotMatrix(confusion)

#using seaborn
# from sklearn import metrics
# cnf_matrix = metrics.confusion_matrix(y_test, predict)
# p = sns.heatmap(pd.DataFrame(cnf_matrix), annot=True)



#using pandas
pd.DataFrame(cnf_matrix).style.background_gradient(cmap='viridis').set_precision(4)

In [None]:
X2d_test