In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline

In [None]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


In [None]:
train = pd.read_csv('../input/human-activity-recognition-with-smartphones/train.csv')
test = pd.read_csv('../input/human-activity-recognition-with-smartphones/test.csv')

In [None]:
print("Training Data :{}".format(train.shape))
print("null values in train_data :{}".format(train.isnull().values.any()))
print("Testing Data :{}".format(test.shape))
print("null values in test_data :{}".format(test.isnull().values.any()))


In [None]:
y_train = train['Activity']
x_train = train.drop(columns = ['Activity' , 'subject'])
y_test = test['Activity']
x_test = test.drop(columns = ['Activity','subject'])

In [None]:
count_of_each_activity = np.array(y_train.value_counts())
activities = sorted(y_train.unique())
plt.rcParams.update({'figure.figsize':[30,30],'font.size':80})
plt.pie(count_of_each_activity,labels=activities,autopct = '%0.2f')

In [None]:
acc = 0 
gyro = 0 
oth = 0 
for column in x_train.columns:
    if 'Acc' in str(column):
      acc += 1
    elif 'Gyro' in str(column):
      gyro += 1
    else :
      oth+=1
      
plt.rcParams.update({'figure.figsize':[10,10],'font.size':16})
plt.bar(['Accelerometer','Gyroscope','Others'],[acc,gyro,oth],color=('r','g','b'))

In [None]:
standing_activity = train[train['Activity']=='STANDING']
standing_activity = standing_activity.reset_index(drop=True)

In [None]:
time = 1
index = 0
time_series = np.zeros(standing_activity.shape[0])
for row_number in range(standing_activity.shape[0]):
    if (row_number == 0 
        or standing_activity.iloc[row_number]['subject'] == standing_activity.iloc[row_number - 1]['subject']):
        time_series[index] = time
        time += 1
    else:
        time_series[index] = 1
        time = 2
    index += 1
time_series_df = pd.DataFrame({ 'Time': time_series })
standing_activity_df = pd.concat([standing_activity, time_series_df], axis = 1)

In [None]:
colors = cm.rainbow(np.linspace(0, 1, len(standing_activity_df['subject'].unique())))

id = 0
for subject in standing_activity_df['subject'].unique():
    plt.rcParams.update({'figure.figsize': [40, 30], 'font.size': 24})
    plt.plot(standing_activity_df[standing_activity_df['subject'] == subject]['Time'], 
             standing_activity_df[standing_activity_df['subject'] == subject]['angle(X,gravityMean)'],
             c = colors[id], 
             label = 'Subject ' + str(subject),
             linewidth = 4)
    plt.xlabel('Time')
    plt.ylabel('Angle')
    plt.title('Angle between X and mean Gravity v/s Time for various subjects')
    plt.legend(prop = {'size': 24})
    id += 1

In [None]:
accuracy_scores = np.zeros(4)

In [None]:


# Support Vector Classifier
clf = SVC().fit(x_train, y_train)
prediction = clf.predict(x_test)
accuracy_scores[0] = accuracy_score(y_test, prediction)*100
print('Support Vector Classifier accuracy: {}%'.format(accuracy_scores[0]))


# Logistic Regression
clf = LogisticRegression().fit(x_train, y_train)
prediction = clf.predict(x_test)
accuracy_scores[1] = accuracy_score(y_test, prediction)*100
print('Logistic Regression accuracy: {}%'.format(accuracy_scores[1]))


# K Nearest Neighbors
clf = KNeighborsClassifier().fit(x_train, y_train)
prediction = clf.predict(x_test)
accuracy_scores[2] = accuracy_score(y_test, prediction)*100
print('K Nearest Neighbors Classifier accuracy: {}%'.format(accuracy_scores[2]))


# Random Forest
clf = RandomForestClassifier().fit(x_train, y_train)
prediction = clf.predict(x_test)
accuracy_scores[3] = accuracy_score(y_test, prediction)*100
print('Random Forest Classifier accuracy: {}%'.format(accuracy_scores[3]))

In [None]:
colors = cm.rainbow(np.linspace(0, 1, 4))
labels = ['Support Vector Classifier', 'Logsitic Regression', 'K Nearest Neighbors', 'Random Forest']
plt.bar(labels,accuracy_scores,color = colors)
plt.xlabel('Classifiers')
plt.ylabel('Accuracy')
plt.title('Accuracy of various algorithms')