# Human Activity Recognition

In [None]:
! gdown https://drive.google.com/uc?id=1ypa5iZ1dLDO-zGRO_yDXrJnMvjRGUG4v   -q

In [1]:
import numpy as np
import pandas as pd
import myutils as my

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
human = pd.read_csv('./HumanActivityRecognition/train.csv')
human.shape

(7352, 563)

### 데이터 준비

- 결측치(df.isna()), 중복치(df.duplicated()) 제거
- X, y 분리
- y값 유니크값 : 레이블 몇개짜리인지
- y값 인코딩

- 정규화 (표준화)
- GridSearchCV : 베스트 모델 착기
- 테스트 파일 읽어서 score 계산하기

In [4]:
human.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,STANDING
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,STANDING
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,STANDING
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,STANDING
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,STANDING


In [5]:
human.shape

(7352, 563)

In [10]:
X_train, X_test, y_train, y_test = my.get_human()   #다중분류

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(5881, 562) (1471, 562)
(5881,) (1471,)


In [11]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)     #스케일링 된 X_train : numpy타입으로 변경 - head() X
y_train = y_train.values  

In [12]:
from sklearn.svm import SVC

clf = SVC(kernel='linear', random_state=2022)    #linear 사용
clf.fit(X_train, y_train)

In [14]:
clf.score(X_train, y_train)

0.9942186702941677

In [15]:
y_pred = clf.predict(X_test)

In [16]:
my.print_score(y_test, y_pred)

accuraccy: 0.9830047586675731
precision: 0.9841959030359355
recall: 0.984108324625566


In [17]:
type(X_train), type(y_train)

(pandas.core.frame.DataFrame, numpy.ndarray)

In [41]:
X_test = scaler.transform(X_test)
y_test = y_test.values

In [42]:
clf.score(X_test, y_test)

0.9983050847457627

In [18]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC

names = ['linear', 'linearSVC', 'poly', 'rbf', 'knn-5', 'knn-9']
models = [
    SVC(kernel='linear', C=1),
    LinearSVC(C=1, max_iter=1000),
    SVC(kernel='poly', degree=3),
    SVC(kernel='rbf', C=1, gamma=0.7),
    KNeighborsClassifier(n_neighbors=5),
    KNeighborsClassifier(n_neighbors=9)
]

scores = []                            #리스트형으로 변환

for name, model in zip(names, models):
    model.fit(X_train, y_train)
    s = model.score(X_train, y_train)
    print(name, s)
    scores.append(s)

linear 0.9942186702941677
linearSVC 0.994898826730148
poly 0.9542594796803264
rbf 1.0
knn-5 0.9872470668253699
knn-9 0.9760244856316953


In [19]:
param_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000]

params = [
    {
          'C' : param_range,
          'gamma' : param_range,
          'kernel':['rbf']
    },
    {
        'C' : param_range,
        'kernel':['linear']
    },
    {
        'C' : param_range,
        'degree':[2, 3],
        'kernel':['poly']
    }
          ]

In [20]:
from sklearn.model_selection import GridSearchCV

clf = SVC(random_state=2022)

gs = GridSearchCV(estimator=clf,
                  param_grid=params,
                  scoring='accuracy',
                  cv=3,
                  n_jobs=-1,
                  verbose=3
                 )
gs.fit(X_train, y_train)

Fitting 3 folds for each of 70 candidates, totalling 210 fits


In [21]:
gs.best_estimator_

In [22]:
gs.best_params_

{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}

In [23]:
from sklearn.model_selection import GridSearchCV

clf = SVC(random_state=2022)

gs = GridSearchCV(estimator=clf,
                  param_grid=params,
                  scoring='accuracy',
                  cv=3,
                  n_jobs=-1,
                  verbose=3
                 )
gs.fit(X_test, y_test)

Fitting 3 folds for each of 70 candidates, totalling 210 fits


In [48]:
humantest = pd.read_csv('./HumanActivityRecognition/test.csv')
humantest.shape

(2947, 563)

In [49]:
import numpy as np
import pandas as pd
import myutils as my

In [50]:
X_train, X_test, y_train, y_test = my.get_humantest()   #다중분류

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(2357, 562) (590, 562)
(2357,) (590,)


In [51]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)     #스케일링 된 X_train : numpy타입으로 변경 - head() X
y_train = y_train.values 

In [52]:
X_test = scaler.transform(X_test)
y_test = y_test.values

In [53]:
from sklearn.svm import SVC

clf = SVC(kernel='linear', random_state=2022)    #linear 사용
clf.fit(X_train, y_train)

In [54]:
clf.score(X_train, y_train)

0.9995757318625371

In [55]:
clf.score(X_test, y_test)

0.9271186440677966

In [31]:
y_pred = clf.predict(X_test)

In [32]:
my.print_score(y_test, y_pred)

accuraccy: 0.9711864406779661
precision: 0.9725050276520865
recall: 0.9724919093851133


In [56]:
param_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000]

params = [
    {
          'C' : param_range,
          'gamma' : param_range,
          'kernel':['rbf']
    },
    {
        'C' : param_range,
        'kernel':['linear']
    },
    {
        'C' : param_range,
        'degree':[2, 3],
        'kernel':['poly']
    }
          ]

In [57]:
from sklearn.model_selection import GridSearchCV

clf = SVC(random_state=2022)

gs = GridSearchCV(estimator=clf,
                  param_grid=params,
                  scoring='accuracy',
                  cv=3,
                  n_jobs=-1,
                  verbose=3
                 )
gs.fit(X_train, y_train)

Fitting 3 folds for each of 70 candidates, totalling 210 fits


In [58]:
gs.best_estimator_

In [36]:
from sklearn.model_selection import GridSearchCV

clf = SVC(random_state=2022)

gs = GridSearchCV(estimator=clf,
                  param_grid=params,
                  scoring='accuracy',
                  cv=3,
                  n_jobs=-1,
                  verbose=3
                 )
gs.fit(X_test, y_test)

Fitting 3 folds for each of 70 candidates, totalling 210 fits


In [39]:
from sklearn.svm import SVC

clf = SVC(kernel='linear', random_state=2022)    #linear 사용
clf.fit(X_test, y_test)