## Libraries

In [144]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import os

In [145]:
data_dir = '../Data/'
locs_dir = data_dir + 'Locations/10FPS/'

locs_files = os.listdir(locs_dir)
locs_files.remove('.DS_Store')
locs_files.sort()

In [146]:
log_reg_model = pd.read_csv(data_dir + 'ClassificationModels/LogisticRegression.csv').to_numpy()[16]
coefficients = log_reg_model[:4].astype(np.float64)
threshold = log_reg_model[4]

dataset = pd.read_csv(data_dir + 'ClassificationDatasets/10FPSDataset_v8.csv')

In [147]:
def classification_model(X, threshold):
    probs = 1 / (1 + np.exp(- np.hstack([np.ones(len(X)).reshape(-1, 1), X]) @ coefficients))
    return np.where(probs >= threshold, 1, 0)

## Self-Correcting Algorithm

```
# Algorithm in Full Form (Not Real-Time Form) & Not Allowing Combos & Not Including Edge Cases
l = 5
w = 3 * l + 2
b_lo = w // 2
b_hi = (w - 1) // 2
m_lo = l // 2
m_hi = (l - 1) // 2
l, w, (b_lo, b_hi), (m_lo, m_hi)
for i in range(b_lo,  n - b_hi):
    s = np.sum(p[(i - m_lo):((i + m_hi) + 1)])
    if (((s / l) % 1) == 0) and (p[i - m_lo] != p[i - m_lo - 1]) and (p[i + m_hi] != p[i + m_hi + 1]):
        p[(i - m_lo):((i + m_hi) + 1)] = (((np.sum(p[(i - b_lo):((i + b_hi) + 1)]) - s) / (w - l)) + 0.5) // 1
```

To smooth windows of length $l$ across the prediction vector $p \in \mathbb{R}^n$:

\begin{align}
w & = 3l + 2 \\
b_- & = \left\lfloor \frac{w}{2} \right\rfloor \\
b_+ & = \left\lfloor \frac{w - 1}{2} \right\rfloor \\
m_- & = \left\lfloor \frac{l}{2} \right\rfloor \\
m_+ & = \left\lfloor \frac{l - 1}{2} \right\rfloor \\
\\
s & = \sum_{j = i - m_-}^{i + m_+}{p_j} \\
\end{align}

\begin{align}
\text{for } & i = b_-, \dots, n - b_+ - 1: \\
& \text{if } \left(\left(\frac{s}{l} \text{ % } 1 \right) == 0\right) \text{ and } \left(p_{i - m_-} != p_{i - m_- - 1} \right) \text{ and } \left(p_{i + m_+} != p_{i + m_+ + 1} \right): \\
& \;\;\;\; v_i = \left\lfloor \frac{\left[ \sum_{j = i - b_-}^{i + b_+}{p_j} \right] - s}{w - l} + \frac{1}{2} \right\rfloor \\
& \;\;\;\; p_{i + k} = v_i, \text{ for } k = -m_-, -m_- + 1, \dots, 0, \dots, m_+ - 1, m_+ \\
\end{align} 

In [151]:
compute_window_params(2)

(2, 8, 4, 3, 1, 0)

In [148]:
def compute_window_params(l):        
    w = (3 * l) + 2
    b_lo = w // 2
    b_hi = (w - 1) // 2
    m_lo = l // 2
    m_hi = (l - 1) // 2  
    return l, w, b_lo, b_hi, m_lo, m_hi

def smooth_window(p, i, l, w, b_lo, b_hi, m_lo, m_hi): 
    s = np.sum(p[(i - m_lo):((i + m_hi) + 1)])
    if (((s / l) % 1) == 0) and (p[i - m_lo] != p[i - m_lo - 1]) and (p[i + m_hi] != p[i + m_hi + 1]):
        p[(i - m_lo):((i + m_hi) + 1)] = (((np.sum(p[(i - b_lo):((i + b_hi) + 1)]) - s) / (w - l)) + 0.5) // 1
        
def smooth_windows_of_length(p, l, incl_next=False):
    window_params = compute_window_params(l)
    b_lo, b_hi = window_params[2:4]
    for i in range(b_lo,  len(p) - b_hi):
        smooth_window(p, i, *window_params) 
        if incl_next:
            smooth_window(p, i, *compute_window_params(l + 1))

In [149]:
all_preds = []
all_orig_preds = []
for locs_file in locs_files:
    dance_no = locs_file.replace('WaggleDance_', '').replace('_Locations.csv', '')
#     print(dance_no)
    dataframe = pd.read_csv(locs_dir + locs_file)
    dance_mvmt = dataframe['dance_movement'].to_numpy()
    dance_mvmt = np.where(dance_mvmt == 2, 0, dance_mvmt)
    input_data = dataset[dataset['LocID'].str.match(dance_no + '_[0-9]*')][['MeanConsecDistance', 'MeanConsecVertAngle', 'Interaction']].to_numpy().astype(np.float64)

    p = classification_model(input_data, threshold).astype(int)
    p_orig = p.copy()
    
    smooth_windows_of_length(p, l=1)
    smooth_windows_of_length(p, l=2)
    smooth_windows_of_length(p, l=3, incl_next=True)
        
    all_preds.append(p)
    all_orig_preds.append(p_orig)
#     print('Orig: ', np.mean(p_orig == dance_mvmt[3:-3]), 'New: ', np.mean(p == dance_mvmt[3:-3]))
#     print('')

#     orig_incorrect_idcs = np.where(dance_mvmt != np.concatenate([[-1, -1, -1], p_orig, [-1, -1, -1]]))[0][3:-3]
#     new_incorrect_idcs = np.where(dance_mvmt != np.concatenate([[-1, -1, -1], p, [-1, -1, -1]]))[0][3:-3]

#     plt.figure()
#     plt.title(locs_file)
#     plt.plot(locs[:, 0], locs[:, 1], color='grey', label='Dance Path')
#     plt.plot(locs[:, 0], locs[:, 1], marker='.', linestyle='None', markersize=5, color='grey')
#     plt.plot(locs[:, 0][new_incorrect_idcs], locs[:, 1][new_incorrect_idcs], marker='.', linestyle='None', markersize=5, color='red', label='Misclassified')
#     plt.legend()
#     plt.show()
    
#     dataframe['Predictions'] = np.concatenate([[-1, -1, -1], y_pred, [-1, -1, -1]])
#     dataframe.to_csv(locs_dir + locs_file, index=False)
    
print('Original Acc:', np.mean(np.concatenate(all_orig_preds) == dataset['MovementClass'].to_numpy()))
print('New Acc:', np.mean(np.concatenate(all_preds) == dataset['MovementClass'].to_numpy()))

Original Acc: 0.8741226017781937
New Acc: 0.879425986585556


In [None]:
# Original:
# (S): 0.9341819759872704

# (S), (D): 0.9379430059308549

# (S), (D), (T): 0.9422826558657602

# (S), (D), (T), (Q): 0.9451757558223637

# (S), (D), (T), (Q), (P): 0.9457543758136844

# (S), (D), (T), (Q), (P), (S): 0.9466223058006654
# (S, D), (T, Q), (P), (S): 0.9477795457833068
# (S, D), (T, Q), (P, S): 0.9477795457833068