**HOMEWORK 2**: Motion classification system for a humanoid robot

saru07g@uw.edu 

**Goal**: Considering a humanoid robot OptimuS-VD which has built-in sensors that record
the movements of 38 of its joints, build a projection of the
recordings to a lower dimension than the number of coordinates, visualize the movements and then based
on it to design an algorithm that will be able to recognize which movement OptimuS-VD is performing in
real-time.

Data contains 5 recorded samples of each of the 3 movements that
OptimuS-VD knows to perform: walking, jumping, and running. The samples are recorded for 1.4 secs (100
timesteps) and saved as a matrix of 114×100.

*Task 1*: Compile all the train samples into a matrix 𝑋𝑡𝑟𝑎𝑖𝑛. Use PCA to investigate the dimensionality of
𝑋𝑡𝑟𝑎𝑖𝑛 and plot the first 5 PCA modes in xyz space.

In [43]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

#load data
folder = "C:\\Users\\Sarayu G\\582\\hw2data\\train\\"
Xtrain = None 

#stack samples
for movement in ["walking", "running", "jumping"]:
    for i in range(1, 6):  # Iterate over 5 samples
        current_fname = f"{movement}_{i}"
        current_vals = np.load(folder + current_fname + ".npy")
        if Xtrain is None:  # Check if vals is None (first iteration)
            Xtrain = current_vals
        else:
            Xtrain = np.hstack((Xtrain, current_vals))

#print(Xtrain.shape)

# Perform PCA
pca = PCA()
pca.fit(Xtrain.T)

# Extract the first 5 principal components
pca_modes = pca.components_[:5]

#plt.figure(figsize=(10, 6))
#for i, mode in enumerate(pca_modes):
    #plt.plot(mode, label=f"Mode {i+1}")

#plt.xlabel('Timesteps', fontsize=15)
#plt.ylabel('Amplitude', fontsize=15)
#plt.title('First 5 PCA Modes', fontsize=15)
#plt.legend()
#plt.show()

#xs = pca_modes[0,:38]
#ys = pca_modes[0,38:76] 
#zs = pca_modes[0,76:]

#fig = plt.figure()
#ax = fig.add_subplot(111, projection='3d')
#ax.plot(xs, ys, zs)

*Task 2*: Investigate how many PCA modes do you need to keep in order to approximate 𝑋𝑡𝑟𝑎𝑖𝑛 up to 70%, 80%, 90% , 95% in the Frobenius norm (i.e., energy). Plot the cumulative energy to justify your results.

In [41]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# Load training data
folder = "C:\\Users\\Sarayu G\\582\\hw2data\\train\\"
Xtrain = None 

# stack samples
for movement in ["walking", "running", "jumping"]:
    for i in range(1, 6): 
        current_fname = f"{movement}_{i}"
        current_vals = np.load(folder + current_fname + ".npy")
        if Xtrain is None:  
            Xtrain = current_vals
        else:
            Xtrain = np.hstack((Xtrain, current_vals))

# Perform PCA
pca = PCA()
pca.fit(Xtrain.T)

ds = pca.singular_values_ 
E = np.power(ds, 2) / np.sum(np.power(ds, 2))
cumulative_energy = np.cumsum(E)

# PCA modes needed for percentages of energy
desired_percentages = [0.70, 0.80, 0.90, 0.95]
modes_needed = []
for desired_percentage in desired_percentages:
    k = np.argmax(cumulative_energy >= desired_percentage) + 1
    modes_needed.append(k)

#for desired_percentage, k in zip(desired_percentages, modes_needed):
    #print(f"For {desired_percentage * 100:.0f}% energy, {k} PCA modes")

# Plot
#plt.plot(np.arange(1, 21), cumulative_energy[:20], marker='o')
#plt.xlabel('Number of Components (k)', fontsize=15)
#plt.ylabel('Cumulative Energy', fontsize=15)
#plt.title('Cumulative Energy vs (k)', fontsize=15)
#plt.grid(True)
#plt.show()

*Task 3*: Truncate the PCA modes to 2 and 3 modes and plot the projected 𝑋𝑡𝑟𝑎𝑖𝑛 in truncated PCA space
as low dimensional 2D (PC1, PC2 coordinates) and 3D (PC1, PC2, PC3 coordinates) trajectories. Use
colors for different movements and discuss the visualization and your findings.

In [40]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA

# Load training data
folder = "C:\\Users\\Sarayu G\\582\\hw2data\\train\\"
Xtrain = None 
movement_names = ["walking", "running", "jumping"]

# Load and stack samples
for movement in movement_names:
    for i in range(1, 6):  # Iterate over 5 samples
        current_fname = f"{movement}_{i}"
        current_vals = np.load(folder + current_fname + ".npy")
        if Xtrain is None:  
            Xtrain = current_vals
        else:
            Xtrain = np.hstack((Xtrain, current_vals))

# Perform PCA
pca = PCA()
pca.fit(Xtrain.T)

# Truncate k = 2, k = 3
pca_modes_2d = pca.components_[:2]
pca_modes_3d = pca.components_[:3]

# Project Xtrain 
proj_2d = pca.transform(Xtrain.T)[:, :2]
proj_3d = pca.transform(Xtrain.T)[:, :3]

# Plot 2D PCA space
#plt.figure(figsize=(8, 6))
#for i, movement in enumerate(movement_names):
    #start_idx = i * 5 * 100
    #end_idx = (i + 1) * 5 * 100
    #plt.plot(proj_2d[start_idx:end_idx, 0], proj_2d[start_idx:end_idx, 1], label=movement)

#plt.title('Trajectories in 2D PCA Space',  fontsize=15)
#plt.xlabel('PC1',  fontsize=15)
#plt.ylabel('PC2',  fontsize=15)
#plt.legend()
#plt.grid(True)
#plt.show()

# Plot 3D PCA space
#fig = plt.figure(figsize=(10, 8))
#ax = fig.add_subplot(111, projection='3d')
#for i, movement in enumerate(movement_names):
    #start_idx = i * 5 * 100
    #end_idx = (i + 1) * 5 * 100
    #ax.plot(np.insert(proj_3d[start_idx:end_idx, 0], 0, 0),
            #np.insert(proj_3d[start_idx:end_idx, 1], 0, 0),
            #np.insert(proj_3d[start_idx:end_idx, 2], 0, 0),
            #label=movement)

#ax.set_title('Trajectories in 3D PCA Space',  fontsize=15)
#ax.set_xlabel('PC1',  fontsize=15)
#ax.set_ylabel('PC2',  fontsize=15)
#ax.set_zlabel('PC3',  fontsize=15)
#ax.legend()
#plt.show()

*Task 4*: In order to classify each sample with type of movement establish the following ground truth. Create a
vector of **ground truth labels** with an integer per class, e.g., 0 (walking), 1 (jumping), 2 (running) and
assign an appropriate label to each sample in 𝑋𝑡𝑟𝑎𝑖𝑛. Then for each movement compute its centroid
(mean) in 𝑘-modes PCA space.

In [39]:
import numpy as np
from sklearn.decomposition import PCA

# Load training data
folder = "C:\\Users\\Sarayu G\\582\\hw2data\\train\\"
vals = None 

# Load and stack samples
movement_names = ["walking", "running", "jumping"]
num_samples_per_movement = 5
labels = []  # Ground truth labels

for i, movement in enumerate(movement_names):
    for j in range(1, num_samples_per_movement + 1):
        current_fname = f"{movement}_{j}"
        current_vals = np.load(folder + current_fname + ".npy")
        if Xtrain is None:
            Xtrain = current_vals
        else:
            Xtrain = np.hstack((Xtrain, current_vals))
        labels.extend([i] * current_vals.shape[1])

# Assign ground truth labels
labels = np.array(labels)
#print(labels.shape)

# k=2
pca_2d = PCA(n_components=2)
X_pca_2d = pca_2d.fit_transform(Xtrain.T)

# k=3
pca_3d = PCA(n_components=3)
X_pca_3d = pca_3d.fit_transform(Xtrain.T)

# centroids k=2
centroids_2d = []
for i, movement in enumerate(movement_names):
    idx = np.where(labels == i)[0]
    centroid = np.mean(X_pca_2d[idx], axis=0)
    centroids_2d.append(centroid)

# centroids k=3
centroids_3d = []
for i, movement in enumerate(movement_names):
    idx = np.where(labels == i)[0]
    centroid = np.mean(X_pca_3d[idx], axis=0)
    centroids_3d.append(centroid)

#print("Centroids for k=2:")
#for i, centroid in enumerate(centroids_2d):
    #print(f"Centroid of {movement_names[i]}: {centroid}")

#print("\nCentroids for k=3:")
#for i, centroid in enumerate(centroids_3d):
    #print(f"Centroid of {movement_names[i]}: {centroid}")

*Task 5*: Having the ground truth, preform the following training. Create another vector of **trained labels**. To
assign these labels, for each sample in 𝑋𝑡𝑟𝑎𝑖𝑛 compute the **distance** between the projected point in
𝑘-modes PCA space and each of the centroids. The minimal distance will determine to which class
the sample belongs. Assign the label of the class of the centroid with minimal distance in the trained
labels vector. Compute the trained labels for various 𝑘 values of 𝑘-PCA truncation and report the
accuracy of the trained classifier (the percentage of samples for which the ground truth and the trained
labels match). You can use *accuracy_score* function in *sklearn* for this purpose. Discuss your results
in terms of optimal 𝑘 for the classifier accuracy.

In [38]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score

# Load training data
folder = "C:\\Users\\Sarayu G\\582\\hw2data\\train\\"
Xtrain = None  
gt_labels = []  # Ground truth labels
movement_names = ["walking", "running", "jumping"]

# Load and stack samples
for i, movement in enumerate(movement_names):
    for j in range(1, 6):  
        current_fname = f"{movement}_{j}"
        current_vals = np.load(folder + current_fname + ".npy")
        if Xtrain is None:  
            Xtrain = current_vals
        else:
            Xtrain = np.hstack((Xtrain, current_vals))
        gt_labels.extend([i] * current_vals.shape[1])


k_values = range(2, 15)
accuracies = []

for k in k_values:
    pca = PCA(n_components=k)
    X_pca = pca.fit_transform(Xtrain.T)

    # centroids
    centroids = []
    for i, movement in enumerate(movement_names):
        idx = np.where(np.array(labels) == i)[0]
        centroid = np.mean(X_pca[idx], axis=0)
        centroids.append(centroid)
    
    # trained labels
    trained_labels = []
    for sample in X_pca:
        distances = [np.linalg.norm(sample - centroid) for centroid in centroids]
        trained_label = np.argmin(distances)
        trained_labels.append(trained_label)
        
    # accuracy
    accuracy = accuracy_score(labels, trained_labels)
    accuracies.append(accuracy)

    #print(f"For k={k}, accuracy: {accuracy}")


*Task 6*: To test how the classification performs on classification/recognition of new samples, load the given test
samples and for each test sample assign the ground truth label. By projecting onto 𝑘-PCA space and
computing the distance to the centroids, **predict the test labels**. Report the accuracy of the classifier
on the test samples. Discuss and compare it with trained accuracy. Try various 𝑘 values.

In [37]:
# Load test data
folder = "C:\\Users\\Sarayu G\\582\\hw2data\\test\\"
test_vals = None
test_gt_labels = []
movement_names = ["walking", "running", "jumping"]

# Load and stack test samples
for movement in movement_names:
    current_fname = f"{movement}_1t"
    current_vals = np.load(folder + current_fname + ".npy")
    if test_vals is None:  
        test_vals = current_vals
    else:
        test_vals = np.hstack((test_vals, current_vals))
    test_gt_labels.extend([movement_names.index(movement)] * current_vals.shape[1])
    
# Load training data
folder = "C:\\Users\\Sarayu G\\582\\hw2data\\train\\"
Xtrain = None  
gt_labels = []  # Ground truth labels
movement_names = ["walking", "running", "jumping"]

# Load and stack samples
for i, movement in enumerate(movement_names):
    for j in range(1, 6):  
        current_fname = f"{movement}_{j}"
        current_vals = np.load(folder + current_fname + ".npy")
        if Xtrain is None:  
            Xtrain = current_vals
        else:
            Xtrain = np.hstack((Xtrain, current_vals))
        gt_labels.extend([i] * current_vals.shape[1])

# PCA for various k values
k_values = range(2, 15) 
accuracies = [] #for storing acurracy values
test_accuracies = []

for k in k_values:
    #PCA
    pca = PCA(n_components=k)
    X_pca = pca.fit_transform(Xtrain.T)
    test_X_pca = pca.transform(test_vals.T)
    
    # find centroids for training data
    centroids = []
    for i, movement in enumerate(movement_names):
        idx = np.where(np.array(gt_labels) == i)[0] #why wont you work
        centroid = np.mean(X_pca[idx], axis=0)
        centroids.append(centroid)

    # labels for training data
    trained_labels = [] 
    for sample in X_pca:
        distances = [np.linalg.norm(sample - centroid) for centroid in centroids]
        trained_label = np.argmin(distances) 
        trained_labels.append(trained_label) #yay

    
    # labels for test data
    test_labels = []
    for sample in test_X_pca:
        distances = [np.linalg.norm(sample - centroid) for centroid in centroids]
        test_label = np.argmin(distances)
        test_labels.append(test_label)
        
    accuracy = accuracy_score(labels, trained_labels)
    accuracies.append(accuracy)
    test_accuracy = accuracy_score(test_gt_labels, test_labels)
    test_accuracies.append(test_accuracy)
    
    #print(f"For k={k}, Test accuracy: {test_accuracy}")
    
import matplotlib.pyplot as plt

#plt.figure(figsize=(10, 6))
#plt.plot(k_values, accuracies, label='Train Accuracy', marker='o')
#plt.plot(k_values, test_accuracies, label='Test Accuracy', marker='o')
#plt.title('Accuracy vs Number of PCA Components (k)', fontsize=15)
#plt.xlabel('Number of PCA Components (k)', fontsize=15)
#plt.ylabel('Accuracy', fontsize=15)
#plt.grid(True)
#plt.xticks(k_values)
#plt.legend()
#plt.show()