In [2]:
from stanford_cs229.modeling.softmax import SoftmaxRegression
from stanford_cs229.modeling.k_means import KMeans
from stanford_cs229.modeling.perceptron import train_perceptron, predict_perceptron
from stanford_cs229.utils.util import load_our_data, plot, preprocess_rolling_avg, cmap
from stanford_cs229.utils.csv_plotter_classifier import format_data

import numpy as np
import matplotlib.pyplot as plt
from importlib import reload
reload(plt)
%matplotlib notebook

# Number of class for learning
K = 5
plt_counter = 0

# Load data and preprocess for K-means
raw_data = load_our_data('stanford_cs229/data/ydeskx_overhand_weight_4-60Hz_extended2.csv')
raw_data = raw_data / raw_data.max()
X_tr = preprocess_rolling_avg(raw_data)
X_tr = X_tr / X_tr.max()

shuffler = np.random.permutation(raw_data.shape[0])
X_tr = X_tr[shuffler]
raw_data = raw_data[shuffler]


# Run K-means to get labels
print("** Running K-means to label raw data **")
k_means = KMeans(verbose=True)
Y_tr_k_means = k_means.assign(X_tr, K, random=True)

** Running K-means to label raw data **
Iteration: 1, Loss: 346.7850511693019
Iteration: 2, Loss: 234.55698705933602
Iteration: 3, Loss: 186.5110245824319
Iteration: 4, Loss: 175.34999406816507
Iteration: 5, Loss: 173.76960375813982
Iteration: 6, Loss: 173.4601177101113
Iteration: 7, Loss: 173.38612910661297
Iteration: 8, Loss: 173.35719254821905
Iteration: 9, Loss: 173.3447852046985
Iteration: 10, Loss: 173.34231681178778
Iteration: 11, Loss: 173.34054809461878
Iteration: 12, Loss: 173.33971688118734
Iteration: 13, Loss: 173.3386489244193
Iteration: 14, Loss: 173.33775581569543
Iteration: 15, Loss: 173.33721276381
Iteration: 16, Loss: 173.33686934339113
Iteration: 17, Loss: 173.33682934809
Iteration: 18, Loss: 173.33675023402495
Iteration: 19, Loss: 173.33675023402495
Time: 0:00:14.154865


In [3]:
# Plot raw data
fig1 = plt.figure(plt_counter)
plt_counter += 1
ax1 = plt.axes()
ax1.scatter(raw_data[:,0], raw_data[:,1], c=cmap(Y_tr_k_means))
ax1.set_xlabel('X acc.')
ax1.set_ylabel('Y acc.')

# Plot preprocessed data
fig2 = plt.figure(plt_counter)
plt_counter += 1
ax2 = plt.axes()
ax2.scatter(X_tr[:,0], X_tr[:,1], c=cmap(Y_tr_k_means))
ax2.set_xlabel('X acc.')
ax2.set_ylabel('Y acc.')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Y acc.')

In [4]:
# Save plots
save_new_plots = False

if save_new_plots:
  plt.figure(fig1.number)
  plt.savefig('manual_labeling.png', dpi=300)
  plt.figure(fig2.number)
  plt.savefig('k_means_labeling.png', dpi=300)

In [5]:
# Train softmax classifier and make predictions
softmax = SoftmaxRegression(lr=0.01, max_iter=1000, verbose=False)

print('** Training softmax classifier **')
softmax.train(X_tr, Y_tr_k_means, K)

print('** Making predictions **')
Y_pred = softmax.predict(X_tr)

accuracy = np.sum(Y_pred == Y_tr_k_means) / Y_tr_k_means.size
print(accuracy)

** Training softmax classifier **
Training Time: 0:00:00.794425
** Making predictions **
0.5066666666666667


In [6]:
# Plot data with predicted labels
fig3 = plt.figure(plt_counter)
plt_counter += 1
ax3 = plt.axes()
ax3.scatter(X_tr[:,0], X_tr[:,1], c=cmap(Y_pred))
ax3.set_xlabel('X acc.')
ax3.set_ylabel('Y acc.')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Y acc.')

In [7]:
from stanford_cs229.modeling.perceptron import train_perceptron, predict_perceptron

def get_neutral_class():
  for c in np.unique(Y_tr_k_means):
    if c not in np.unique(Y_pred):
      return c

neutral_class = get_neutral_class()
Y_tr_perceptron = np.where(Y_tr_k_means==neutral_class, 0, 1)

# Make neutral class have 0 label by swapping labels with whatever class has the 0 label
Y_pred = np.where(Y_pred==0, neutral_class, Y_pred)

# Make neutral class have 0 label by swapping labels with whatever class has the 0 label
k_means_zeros = np.argwhere(Y_tr_k_means == 0)
k_means_neutral_class = np.argwhere(Y_tr_k_means == neutral_class)
Y_tr_k_means[k_means_zeros] = neutral_class
Y_tr_k_means[k_means_neutral_class] = 0

radius = 0.05

print('** Training perceptron **')
state = train_perceptron(raw_data[:3000, 2:] * 10, Y_tr_perceptron[:3000], radius=radius)
print('** Predicting perceptron **')
predictions = np.array([predict_perceptron(state, raw_data[i, 2:] * 10, radius=radius) for i in range(3000)])

** Training perceptron **
Completed 0 iterations
Completed 100 iterations
Completed 200 iterations
Completed 300 iterations
Completed 400 iterations
Completed 500 iterations
Completed 600 iterations
Completed 700 iterations
Completed 800 iterations
Completed 900 iterations
Completed 1000 iterations
Completed 1100 iterations
Completed 1200 iterations
Completed 1300 iterations
Completed 1400 iterations
Completed 1500 iterations
Completed 1600 iterations
Completed 1700 iterations
Completed 1800 iterations
Completed 1900 iterations
Completed 2000 iterations
Completed 2100 iterations
Completed 2200 iterations
Completed 2300 iterations
Completed 2400 iterations
Completed 2500 iterations
Completed 2600 iterations
Completed 2700 iterations
Completed 2800 iterations
Completed 2900 iterations
** Predicting perceptron **


In [8]:
# Plot data with predicted labels
fig5 = plt.figure(plt_counter)
plt_counter += 1
ax5 = plt.axes()
ax5.scatter(X_tr[:,0], X_tr[:,1], c=cmap(Y_tr_perceptron))
ax5.set_xlabel('X acc.')
ax5.set_ylabel('Y acc.')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Y acc.')

In [9]:
# Plot data with predicted labels
print(type(predictions))
fig6 = plt.figure(plt_counter)
plt_counter += 1
ax6 = plt.axes()
ax6.scatter(X_tr[:3000,0], X_tr[:3000,1], c=cmap(predictions))
ax6.set_xlabel('X acc.')
ax6.set_ylabel('Y acc.')

y = np.expand_dims(Y_tr_perceptron[:1000], axis=1)
x = X_tr[:1000, :]

np.savetxt('x_train.csv', np.hstack((y, x)), delimiter=',')

<class 'numpy.ndarray'>


<IPython.core.display.Javascript object>

In [10]:
# Train softmax classifier and make predictions
softmax_2 = SoftmaxRegression(lr=0.005, max_iter=5000, verbose=False)

NUM_DATA = 3000
nonzero_indices = np.nonzero(Y_tr_perceptron[:NUM_DATA])
softmax_tr_x = np.hstack((raw_data[:NUM_DATA, 2:], raw_data[:NUM_DATA, 2:2]))
softmax_tr_y = Y_pred[:NUM_DATA][nonzero_indices]-1
print('** Training softmax classifier **')
softmax_2.train(softmax_tr_x[nonzero_indices], softmax_tr_y, K-1)

print('** Making predictions **')
Y_pred_2 = softmax_2.predict(softmax_tr_x) + 1

accuracy_2 = np.sum(Y_pred_2[nonzero_indices] == Y_pred[:NUM_DATA][nonzero_indices]) / Y_pred[:NUM_DATA][nonzero_indices].size
print(accuracy_2)

** Training softmax classifier **
Training Time: 0:00:01.109415
** Making predictions **
0.6942307692307692


In [11]:
# Plot data with predicted labels
fig4 = plt.figure(plt_counter)
plt_counter += 1
ax4 = plt.axes()
ax4.scatter(X_tr[:NUM_DATA,0][nonzero_indices], X_tr[:NUM_DATA,1][nonzero_indices], c=cmap(Y_pred_2[nonzero_indices]))
ax4.set_xlabel('X acc.')
ax4.set_ylabel('Y acc.')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Y acc.')

In [12]:
# Plot data with predicted labels
fig4 = plt.figure(plt_counter)
plt_counter += 1
ax4 = plt.axes()
ax4.scatter(X_tr[:3000,0], X_tr[:3000,1], c=cmap(Y_pred_2 * predictions))
ax4.set_xlabel('X acc.')
ax4.set_ylabel('Y acc.')

# Plot data with predicted labels
fig4 = plt.figure(plt_counter)
plt_counter += 1
ax4 = plt.axes()
ax4.scatter(X_tr[:3000,0], X_tr[:3000,1], c=cmap(Y_tr_k_means[:3000]))
ax4.set_xlabel('X acc.')
ax4.set_ylabel('Y acc.')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Y acc.')

In [13]:
# Determine what percent of the data each class makes up
for c in np.unique(Y_tr_k_means[:3000]):
  percent_makup = np.sum(Y_tr_k_means[:3000] == c) / Y_tr_k_means[:3000].size
  print('Percent makup of total data for class {}: {}'.format(c, percent_makup))

# Determine the accuracy on each individual class
labels_for_incorrect_predictions = np.where(Y_tr_k_means[:3000] != Y_pred_2 * predictions, Y_tr_k_means[:3000], -1)
labels_for_incorrect_predictions = labels_for_incorrect_predictions[labels_for_incorrect_predictions >= 0]
for c in np.unique(Y_tr_k_means[:3000]):
  percent_makup = np.sum(labels_for_incorrect_predictions == c) / labels_for_incorrect_predictions.size
  print('Percent makup of incorrect predictions for class {}: {}'.format(c, percent_makup))

Percent makup of total data for class 0: 0.48
Percent makup of total data for class 1: 0.111
Percent makup of total data for class 2: 0.12233333333333334
Percent makup of total data for class 3: 0.09166666666666666
Percent makup of total data for class 4: 0.195
Percent makup of incorrect predictions for class 0: 0.10606060606060606
Percent makup of incorrect predictions for class 1: 0.19090909090909092
Percent makup of incorrect predictions for class 2: 0.15606060606060607
Percent makup of incorrect predictions for class 3: 0.22121212121212122
Percent makup of incorrect predictions for class 4: 0.32575757575757575


In [14]:
accuracy_3 = np.sum(Y_pred_2 * predictions == Y_tr_k_means[:3000]) / Y_tr_k_means[:3000].size
print(accuracy_3)

0.78
