<h1>Data processing</h1>
We delete inputs that are not used and combine inputs that do the same thing. We also convert the j-stick and c-stick to polar. 

First load the libraires.

In [None]:
import os as os
import numpy as np
import pandas as pd

<h2>Load dataframe</h2>
We load the data we extracted from the replayes in the data_extraction noteboox. We also make a list of the buttons as they appear in the data.

In [None]:
data_file_path = '../data/Sheik_vs_Fox_1024_frames.pkl'
df = pd.read_pickle(data_file_path)
# button_labels = ['DPAD_LEFT', 'DPAD_RIGHT', 'DPAD_DOWN', 'DPAD_UP', 'Z', 'R', 'L', 'A', 'B', 'X', 'Y', 'START','J_X','J_Y','C_X','C_Y','T_L','T_R']

# Print the first few rows to make sure we have the data we expect.
df.head()

<h2>Delete with unused inputs</h2>
We delete the left, right, up, and down dpad inputs as well as the start input.

In [None]:
# Define the indices of the columns we do not need
columns_to_remove = [0, 1, 2, 3, 6, 7, 11]      # TODO: Implement an enumeration

# Use a list comprehension to create a new 'TimeSeries' column with modified arrays
df['relevant_input_data'] = [np.delete(array, columns_to_remove, axis=1) for array in df['input_data']]

print('original input shape', df['input_data'][1].shape)
print('relevant input shape', df['relevant_input_data'][1].shape)
df.head()


<h2>Split input data into columns</h2>

In [None]:
# Define the labels for the new columns
new_column_labels = ['Z', 'A', 'B', 'X', 'Y', 'J_X', 'J_Y', 'C_X', 'C_Y', 'T_L', 'T_R']

# Iterate through the new column labels and add them to the DataFrame
for label in new_column_labels:
    df[label] = df['relevant_input_data'].apply(lambda arr: arr[:, new_column_labels.index(label)])

<h2> Create new columns</h2>
(X and Y) and (the triggers) do the same thing so we make a new column that is the max of these buttons. We convert the j-stick and c-stick to polar coordinates.

In [None]:
# Create a new column 'max_X_Y' with the maximum of 'X' and 'Y' numpy arrays
df['max_X_Y'] = df.apply(lambda row: np.maximum(row['X'], row['Y']), axis=1)

# Create a new column 'max_T' with the maximum of 'T_L' and 'T_Y' numpy arrays
df['max_L_R'] = df.apply(lambda row: np.maximum(row['T_L'], row['T_R']), axis=1)

# Convert 'J_X' and 'J_Y' to polar coordinates and create 'J_theta' and 'J_radius' columns
df['J_theta'] = df.apply(lambda row: np.arctan2(row['J_Y'], row['J_X']), axis=1)
df['J_radius'] = df.apply(lambda row: np.sqrt(row['J_X'] ** 2 + row['J_Y'] ** 2), axis=1)

# Convert 'C_X' and 'C_Y' to polar coordinates and create 'C_theta' and 'C_radius' columns
df['C_theta'] = df.apply(lambda row: np.arctan2(row['C_Y'], row['C_X']), axis=1)
df['C_radius'] = df.apply(lambda row: np.sqrt(row['C_X'] ** 2 + row['C_Y'] ** 2), axis=1)

# Display the columns of the DataFrame
print(df.columns)
df.head(1)

<h2>Create columns with reduced inputs</h2>
One column with sticks encoded in cartesian coordinates and the second in polar.

In [None]:
buttons_to_train_on_cart = ['Z', 'A', 'B', 'max_X_Y', 'max_L_R', 'J_X', 'J_Y', 'C_X', 'C_Y']

# Function to create the (900, 9) array for each row
def create_training_input_cart(row):
    return np.stack([row[button] for button in buttons_to_train_on_cart], axis=1)

# Apply the function across the DataFrame
df['training_inputs_cart'] = df.apply(create_training_input_cart, axis=1)

# Verify the shape of the first element in 'training_inputs_cart'
print(df['training_inputs_cart'].iloc[0].shape)  # Should output (900, 9)

In [None]:
buttons_to_train_on_polar = ['Z', 'A', 'B', 'max_X_Y', 'max_L_R', 'J_theta', 'J_radius', 'C_theta', 'C_radius']

# Function to create the (900, 9) array for each row
def create_training_input_polar(row):
    return np.stack([row[button] for button in buttons_to_train_on_polar], axis=1)

# Apply the function across the DataFrame
df['training_inputs_polar'] = df.apply(create_training_input_polar, axis=1)

# Verify the shape of the first element in 'training_inputs_polar'
print(df['training_inputs_polar'].iloc[0].shape)  # Should output (900, 9)

<h2>Save Data</h2>

In [None]:
df_train_cart = df[['is_sheik','training_inputs_cart']]
df_train_polar = df[['is_sheik','training_inputs_polar']]

# df_train_cart.to_pickle('../data/sheik_v_fox_15_seconds_cartesian.pkl')
# df_train_polar.to_pickle('../data/sheik_v_fox_15_seconds_polar.pkl')

<h2> Save as NumPy </h2>

In [None]:
print(df.shape[0])
print(df['training_inputs_polar'][0].shape[0])
X_cart= df['training_inputs_polar']     # Get all input data from dataset.
X_cart = np.stack(X_cart, axis = 0)           # Convert from list of np arrays to np array.
X_cart = X_cart.reshape((df.shape[0], 9, df['training_inputs_polar'][0].shape[0]))      # Swaps from (48982, 900, 9) -> (48982, 9, 900).

X_polar= df['training_inputs_polar']     # Get all input data from dataset.
X_polar = np.stack(X_polar, axis = 0)           # Convert from list of np arrays to np array.
X_polar = X_polar.reshape((df.shape[0], 9, df['training_inputs_polar'][0].shape[0]))      # Swaps from (48982, 900, 9) -> (48982, 9, 900).

y = df['is_sheik']                  # Get all labels from dataset.
y = np.array(y)                     # Convert from list to np array.
print(y.shape)
# Save as a Binary file
np.save('../data/training_inputs_cart_numpy_binary_1024.npy', X_cart)
np.save('../data/training_inputs_polar_numpy_binary_1024.npy', X_polar)
np.save('../data/labes_is_sheik_numpy_binary_1024.npy', y)