In [1]:
import pandas as pd
import numpy as np
import math

In [24]:
url_gestures = 'https://raw.githubusercontent.com/nullpitch-dev/hj_public/master/Gestures.csv'
data_gestures = pd.read_csv(url_gestures)

In [35]:
base = data_gestures

### [1]

In [45]:
d1 = base.loc[:, 'arctan_1':'arctan_79']

d1 = (d1 != 0) * 1
d1.sum().sum()

469041

### [2]

In [51]:
d2 = base.assign(ttl_dist=base.loc[:, 'distance_1':'distance_79'].sum(axis=1))

In [62]:
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

formula = 'ttl_dist ~ C(character)'

model = ols(formula, d2).fit()
table = anova_lm(model)

In [69]:
print(f"{math.floor(table.loc['C(character)', 'F'])}")
print(f"p-value = {table.loc['C(character)', 'PR(>F)']}, so H0 is rejected.")
print(f"Meaning, total distance is different by characters")

814
p-value = 0.0, so H0 is rejected.
Meaning, total distance is different by characters


### [3]

In [71]:
d3 = base.loc[:, 'arctan_1':'distance_79']

In [88]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

norm = StandardScaler().fit_transform(d3)
pca = PCA(n_components=15, random_state=1234).fit(norm)

In [94]:
explain = pca.explained_variance_ratio_.sum()

In [109]:
eigen = pca.components_[1][2]

In [110]:
print(f"{explain * 100:.4f}, {eigen:.4f}")

57.8805, 0.0658


### [4]

In [197]:
d4 = pca.transform(norm)
d4 = pd.DataFrame(d4)
d4 = pd.merge(d4, base[['XGRP', 'character']],
              left_index=True, right_index=True)

d4_train = d4[d4['XGRP'] == 'TRAIN']
d4_test = d4[d4['XGRP'] == 'TEST']

In [198]:
from sklearn.neighbors import KNeighborsClassifier

no = d4['character'].nunique()
knn = KNeighborsClassifier(n_neighbors=no, algorithm='auto').fit(
                                   d4_train.iloc[:, :-2], d4_train['character'])

In [199]:
pred = knn.predict(d4_test.iloc[:, :-2])
pred = pd.DataFrame(pred)

In [200]:
result = pd.merge(d4_test['character'].reset_index(), pred, left_index=True,
                  right_index=True)
result = result.drop(columns='index')

In [201]:
result = result.rename(columns={'character': 'fact', 0: 'esti'})

In [206]:
result = result.assign(accu=(result['fact'] == result['esti']) * 1)

In [214]:
accuracy = result['accu'].sum() / result['accu'].count()
print(f"{accuracy:.3f}")

0.817
