# Import Library

In [None]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt 
%matplotlib inline

# import SVC classifier
from sklearn.svm import SVC

# import metrics to compute accuracy (Evulate)
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV

# Load the Training DataSet

In [None]:
df_train = pd.read_csv("../input/human-activity-recognition-with-smartphones/train.csv")

# EDA

In [None]:
df_train.head()

In [None]:
df_train.tail()

In [None]:
df_train.shape

## Check for missing values in the dataset

In [None]:
df_train.isnull().values.any()

## Class Distribution

In [None]:
df_train["Activity"].unique()

In [None]:
#finding max and min values of the parameter to find range
tbodymax=df_train['tBodyAcc-mean()-X'].max(skipna = False)
tbodymin=df_train['tBodyAcc-mean()-X'].min(skipna = False)
print("The range of values for tbodyAccmean()-X are: ")
print("Max value: ",tbodymax)
print("Min value: ",tbodymin)

In [None]:
tgravmax=df_train['angle(X,gravityMean)'].max(skipna = False)
tgravmin=df_train['angle(X,gravityMean)'].min(skipna = False)
print("The range of values for tgravityAcc-mean()-X are: ")
print("Max value: ",tgravmax)
print("Min value: ",tgravmin)

In [None]:
plt.figure(figsize=(7,7))
sns.boxplot(x='Activity', y='tBodyAccMag-mean()',data=df_train, showfliers=False, saturation=1)
plt.ylabel('Acceleration Magnitude mean')
plt.axhline(y=-0.7, xmin=0.1, xmax=0.9,dashes=(5,5), c='g')
plt.axhline(y=-0.05, xmin=0.4, dashes=(5,5), c='m')
plt.xticks(rotation=90)
plt.show()

* Observation:
If tAccMean is < -0.8 then the Activities are either Standing or Sitting or Laying.
If tAccMean is > -0.6 then the Activities are either Walking or WalkingDownstairs or WalkingUpstairs.
If tAccMean > 0.0 then the Activity is WalkingDownstairs.
We can classify 75% the Acitivity labels with some errors.

In [None]:
sns.boxplot(x='Activity', y='angle(X,gravityMean)', data=df_train)
plt.axhline(y=0.08, xmin=0.1, xmax=0.9,c='m',dashes=(5,3))
plt.title('Angle between X-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.show()

* If angleX,gravityMean > 0 then Activity is Laying.
We can classify all datapoints belonging to Laying activity with just a single if else statement.

In [None]:
sns.boxplot(x='Activity', y='angle(Y,gravityMean)', data = df_train, showfliers=False)
plt.title('Angle between Y-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.axhline(y=-0.22, xmin=0.1, xmax=0.8, dashes=(5,3), c='m')
plt.show()

* Observation: If angley,gravityMean < -0.2 then the activity is laying.

In [None]:
#Creating a dataframe of Walking activity 
walkingactivity=df_train[df_train['Activity']=='WALKING']
#For walking 
tbodymax=walkingactivity['tBodyAccMag-mean()'].max(skipna = False)
tbodymin=walkingactivity['tBodyAccMag-mean()'].min(skipna = False)
print("The range of values for tBodyAccMag-mean() of 'WALKING' activity are: ")
print("Max value: ",tbodymax)
print("Min value: ",tbodymin)

In [None]:
#Creating a dataframe of Walking Upstairs activity 
walking_upstairsactivity=df_train[df_train['Activity']=='WALKING_UPSTAIRS']
#For walkingupstairs 
tbodymax=walking_upstairsactivity['tBodyAccMag-mean()'].max(skipna = False)
tbodymin=walking_upstairsactivity['tBodyAccMag-mean()'].min(skipna = False)
print("The range of values for tBodyAccMag-mean() of 'WALKING_UPSTAIRS' activity are: ")
print("Max value: ",tbodymax)
print("Min value: ",tbodymin)

In [None]:
#Creating a dataframe of Walking Downstairs activity 
walking_downstairsactivity=df_train[df_train['Activity']=='WALKING_DOWNSTAIRS']
#For walkingdownstairs 
tbodymax=walking_downstairsactivity['tBodyAccMag-mean()'].max(skipna = False)
tbodymin=walking_downstairsactivity['tBodyAccMag-mean()'].min(skipna = False)
print("The range of values for tBodyAccMag-mean() of 'WALKING_DOWNSTAIRS' activity are: ")
print("Max value: ",tbodymax)
print("Min value: ",tbodymin)

In [None]:
#Creating a dataframe of STANDING 
standingactivity=df_train[df_train['Activity']=='STANDING']
#For standing 
tbodymax=standingactivity['angle(X,gravityMean)'].max(skipna = False)
tbodymin=standingactivity['angle(X,gravityMean)'].min(skipna = False)
print("The range of values for angle(X,gravityMean) of 'STANDING' activity are: ")
print("Max value: ",tbodymax)
print("Min value: ",tbodymin)

In [None]:
#For standing 
tbodymax=standingactivity['angle(Y,gravityMean)'].max(skipna = False)
tbodymin=standingactivity['angle(Y,gravityMean)'].min(skipna = False)
print("The range of values for angle(Y,gravityMean) of 'STANDING' activity are: ")
print("Max value: ",tbodymax)
print("Min value: ",tbodymin)

In [None]:
#Creating a dataframe of SITTING 
sittingactivity=df_train[df_train['Activity']=='SITTING']
#For sitting
tbodymax=sittingactivity['angle(X,gravityMean)'].max(skipna = False)
tbodymin=sittingactivity['angle(X,gravityMean)'].min(skipna = False)
print("The range of values for angle(X,gravityMean) of 'Sitting' activity are: ")
print("Max value: ",tbodymax)
print("Min value: ",tbodymin)

In [None]:
#For sitting
tbodymax=sittingactivity['angle(Y,gravityMean)'].max(skipna = False)
tbodymin=sittingactivity['angle(Y,gravityMean)'].min(skipna = False)
print("The range of values for angle(Y,gravityMean) of 'Sitting' activity are: ")
print("Max value: ",tbodymax)
print("Min value: ",tbodymin)

In [None]:
pd.crosstab(index = df_train["Activity"],columns="count")

## Visualize the Class Distribution

In [None]:
plt.figure(figsize=(10,5))
ax = sns.countplot(x="Activity", data=df_train)
plt.xticks(x = df_train['Activity'],  rotation='vertical')
plt.show()

In [None]:
df_train["subject"].unique()

In [None]:
X = pd.DataFrame(df_train.drop(['Activity','subject'],axis=1))
Y = df_train.Activity.values.astype(object)

X.shape, Y.shape

In [None]:
X.head()

In [None]:
Y[1]

In [None]:
X.info()

In [None]:
#Total Number of Continous and Categorical features in the training set
num_cols = X._get_numeric_data().columns
print("Number of numeric features:",num_cols.size)

## Transforming non numerical labels into numerical labels

In [None]:
from sklearn import preprocessing
encoder = preprocessing.LabelEncoder()

In [None]:
# encoding train labels 
encoder.fit(Y)
y = encoder.transform(Y)
y.shape

In [None]:
y[1]

In [None]:
encoder.classes_

In [None]:
encoder.classes_[2]

## Feature Scaling

In [None]:
# Scaling the feature 
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
X = scaler.fit_transform(X)
X[1]

# Split X and y into training and validation sets

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.2, random_state = 99)
X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

# Train the Model

In [None]:
# instantiate classifier with default hyperparameters
svc = SVC() 

In [None]:
# fit classifier to training set
svc.fit(X_train,y_train)

In [None]:
# make predictions on test set
y_pred = svc.predict(X_valid)

In [None]:
# compute and print accuracy score
print('Model accuracy score with default hyperparameters: {0:0.4f}'. format(accuracy_score(y_valid, y_pred)))