In [130]:
import numpy as np
import pandas as pd

## Data Preparation

In [131]:
data_gerakan_benar = pd.read_csv("../data/gerakan-benar.csv")
data_gerakan_benar["kelas"] = 1
data_gerakan_benar.head()

Unnamed: 0,ax1,ay1,az1,gx1,gy1,gz1,ax2,ay2,az2,gx2,gy2,gz2,ax3,ay3,az3,gx3,gy3,gz3,kelas
0,2.91,6.17,3.59,-1.67,-0.46,0.92,5.53,8.0,-1.73,0.31,0.16,-0.41,-0.22,9.47,-2.62,-0.06,0.02,0.01,1
1,2.58,8.36,2.91,0.18,0.07,-0.21,5.77,7.74,-1.23,-0.14,-0.04,-0.07,-0.28,9.57,-2.27,0.0,0.12,0.03,1
2,4.09,6.41,4.38,-1.3,-0.25,0.87,5.13,8.39,-1.98,0.41,0.09,-0.29,-0.09,9.46,-2.35,-0.03,0.03,0.02,1
3,3.58,7.54,3.47,0.51,0.13,-0.46,5.27,8.04,-1.34,-0.23,-0.09,0.0,-0.18,9.62,-1.83,0.04,0.12,-0.02,1
4,4.02,6.64,4.09,-1.16,-0.05,0.92,5.04,8.26,-1.68,0.27,0.12,-0.38,0.18,9.5,-1.97,-0.02,-0.06,0.03,1


In [132]:
data_gerakan_salah = pd.read_csv("../data/gerakan-salah.csv")
data_gerakan_salah["kelas"] = 0
data_gerakan_salah.head()

Unnamed: 0,ax1,ay1,az1,gx1,gy1,gz1,ax2,ay2,az2,gx2,gy2,gz2,ax3,ay3,az3,gx3,gy3,gz3,kelas
0,3.3,-6.13,5.3,-0.14,-0.1,0.07,-0.65,5.4,1.94,0.01,0.16,0.0,-0.02,9.65,-1.23,0.03,0.08,0.03,0
1,5.87,-2.27,6.23,-2.15,0.25,1.27,0.07,7.81,4.61,0.66,0.9,-0.36,-0.73,9.64,-1.35,0.12,0.16,0.09,0
2,3.25,0.26,4.64,0.41,-0.5,-0.15,0.22,7.4,5.04,-0.3,-0.11,-0.08,0.23,9.55,-1.55,0.14,-0.05,0.03,0
3,4.26,-5.44,6.57,2.04,-0.11,-0.96,-1.51,7.9,5.08,-0.18,-0.31,0.4,0.03,9.57,-2.13,-0.14,-0.14,0.08,0
4,3.66,-10.31,4.25,0.03,-0.27,-0.14,-1.82,7.2,4.15,0.12,-0.06,0.46,0.46,9.48,-2.38,0.09,0.09,-0.08,0


In [133]:
data = pd.concat([data_gerakan_benar, data_gerakan_salah], ignore_index=True)
data.head()

Unnamed: 0,ax1,ay1,az1,gx1,gy1,gz1,ax2,ay2,az2,gx2,gy2,gz2,ax3,ay3,az3,gx3,gy3,gz3,kelas
0,2.91,6.17,3.59,-1.67,-0.46,0.92,5.53,8.0,-1.73,0.31,0.16,-0.41,-0.22,9.47,-2.62,-0.06,0.02,0.01,1
1,2.58,8.36,2.91,0.18,0.07,-0.21,5.77,7.74,-1.23,-0.14,-0.04,-0.07,-0.28,9.57,-2.27,0.0,0.12,0.03,1
2,4.09,6.41,4.38,-1.3,-0.25,0.87,5.13,8.39,-1.98,0.41,0.09,-0.29,-0.09,9.46,-2.35,-0.03,0.03,0.02,1
3,3.58,7.54,3.47,0.51,0.13,-0.46,5.27,8.04,-1.34,-0.23,-0.09,0.0,-0.18,9.62,-1.83,0.04,0.12,-0.02,1
4,4.02,6.64,4.09,-1.16,-0.05,0.92,5.04,8.26,-1.68,0.27,0.12,-0.38,0.18,9.5,-1.97,-0.02,-0.06,0.03,1


In [134]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [135]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Preprocessing

In [136]:
def covariance_matrix(x):
    """
    Menghitung matriks kovarian dari data X.

    Parameters:
    - X: np.array, matriks data dengan setiap kolom mewakili suatu fitur.

    Returns:
    - cov_mat: np.array, matriks kovarian dari data X.
    """
    n_samples, n_features = x.shape

    # Menghitung rata-rata setiap fitur
    mean_vector = np.mean(x, axis=0)

    # Menghitung deviasi dari rata-rata
    deviation_matrix = x - mean_vector

    # Menghitung matriks kovarian
    cov_mat = np.dot(deviation_matrix.T, deviation_matrix) / (n_samples - 1)

    return cov_mat

In [137]:
class MyPCA:
    def __init__(self, n_components):
        """
        Inisialisasi objek MyPCA.

        Parameter:
        - n_components (int): Jumlah komponen utama yang akan dipertahankan.
        """
        self.cum_explained_variance = None
        self.explained_variance_ratio = None
        self.components = None
        self.n_components = n_components

    def fit(self, x):
        """
        Melatih model PCA menggunakan data input.

        Parameter:
        - x (numpy.ndarray): Matriks data input.

        Returns:
        - self: Objek MyPCA yang telah dilatih.
        """
        x = x.copy()

        # Eigendecomposition dari matriks kovarian       
        cov_mat = covariance_matrix(x)
        eig_vals, eig_vecs = np.linalg.eig(cov_mat) 
        eig_vecs = eig_vecs.T

        eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[i,:]) for i in range(len(eig_vals))]
        eig_pairs.sort(key=lambda _x: _x[0], reverse=True)
        eig_vals_sorted = np.array([x[0] for x in eig_pairs])
        eig_vecs_sorted = np.array([x[1] for x in eig_pairs])

        self.components = eig_vecs_sorted[:self.n_components,:]

        # Rasio variansi yang dijelaskan
        self.explained_variance_ratio = [i/np.sum(eig_vals) for i in eig_vals_sorted[:self.n_components]]
        self.cum_explained_variance = np.cumsum(self.explained_variance_ratio)

        return self

    def transform(self, x):
        """
        Mentransformasi data input menggunakan komponen utama yang telah dilatih.

        Parameter:
        - x (numpy.ndarray): Matriks data input yang akan ditransformasi.

        Returns:
        - transformed_data (numpy.ndarray): Data yang telah ditransformasi menggunakan komponen utama.
        """
        x = x.copy()
        transformed_data = x.dot(self.components.T)
        return transformed_data

### Implement PCA

In [145]:
my_pca = MyPCA(n_components=3).fit(X_train)

In [146]:
X_train_pca = my_pca.transform(X_train)
X_train_pca

Unnamed: 0,0,1,2
154,4.770542,-8.571080,-7.802937
136,9.135974,-7.033519,-9.675613
147,8.857905,-6.582485,-9.773927
51,-5.956812,-7.918031,-3.159351
126,-5.667413,-5.190947,-6.846729
...,...,...,...
71,-5.944045,-11.577226,-3.435093
106,3.888735,-8.731489,-3.401343
14,-4.048942,-11.781622,-1.664945
92,6.353937,-9.464154,-2.767229


## Model

In [147]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_pca, y_train)

In [148]:
X_test_pca = my_pca.transform(X_test)
X_test_pca

Unnamed: 0,0,1,2
162,10.65404,-6.226513,-10.126088
42,-4.091997,-8.505171,-4.977809
90,4.186897,-8.043284,-2.821407
60,-4.063294,-10.218198,-4.435619
114,-11.081257,5.985047,-2.827483
137,-4.000521,-9.466231,-7.793036
41,-4.72162,-7.932908,-5.138642
15,-2.423595,-10.980573,-2.757944
113,-8.355043,1.607558,-4.175419
108,1.431754,-10.097854,-3.548594


In [149]:
pred = rf_model.predict(X_test_pca)
pred

array([0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0])

## Evaluasi

In [150]:
from sklearn.metrics import accuracy_score

score = accuracy_score(y_test, pred)
score

0.9714285714285714