# LSTM

In [None]:
from pandas import DataFrame
from sklearn.impute import SimpleImputer
from numpy import nan, arange, array
import seaborn as sns
from matplotlib import pyplot as plt
import tensorflow as tf
from numpy import ndarray, mean, float32
from pandas import Series
from sklearn.preprocessing import LabelBinarizer
from typing import Tuple

from src.labels import iterate_valid_labels, find_valid_segments, get_labels_from_video
from src.common.helpers import read_dataframe

In [None]:
def get_landmark_df_path(video_path: str) -> str:
    return video_path.replace("/videos/", "/df/videos/").replace(".mp4", ".pkl")

In [None]:
video_path = "data/videos/Route9Climb1.mp4"
label_path = get_labels_from_video(video_path)
hpe_path = get_landmark_df_path(video_path)

valids = find_valid_segments(label_path)
df = read_dataframe(hpe_path)

In [None]:
valid_segments = []
for valid_segment_idx in valids:
    valid_segment_slice = slice(valid_segment_idx[0], valid_segment_idx[1])
    valid_segments.append(df[valid_segment_slice])

print(f"Valid segments: {len(valid_segments)}")

In [None]:
#TODO: replace with iteration over all valid segments
temp = valid_segments[0]

In [None]:
def impute_features(features: DataFrame) -> DataFrame:
	output = features.copy()
	frame_num = output.pop('frame_num')
	imp = SimpleImputer(missing_values=nan, strategy='constant', fill_value=0, 
		keep_empty_features=True)
	output = DataFrame(imp.fit_transform(output), columns=output.keys())
	output['frame_num'] = frame_num
	return output

def normalize_features(train_df: DataFrame, val_df: DataFrame, test_df: DataFrame):
	pass

In [None]:
#TODO: reuse from src.hpe_dnn.helpers
def binarize_labels(labels: Series) -> ndarray:
    encoder = LabelBinarizer()
    encoder.fit(list(iterate_valid_labels()))
    return encoder.transform(labels)

def unbinarize_labels(logits: Series):
    encoder = LabelBinarizer()
    encoder.fit(list(iterate_valid_labels()))
    return encoder.inverse_transform(logits)[0]

In [None]:
class WindowGenerator():
	def __init__(self, input_width: int, label_width: int, shift: int, valid_segment: DataFrame):
		# Store the raw data.
		df = valid_segment.copy()

		# Transform labels to model ouputs
		labels_str = df.pop('label')
		labels_bin = binarize_labels(labels_str)
		self.labels = DataFrame(data=labels_bin, columns=list(iterate_valid_labels()))
		
		# Remaining columns are the input features
		self.features = impute_features(df)

		# Work out the label column indices.
		self.column_indices = {name: i for i, name in enumerate(self.features.columns)}

		# Work out the window parameters.
		self.input_width = input_width
		self.label_width = label_width
		self.shift = shift

		self.total_window_size = input_width + shift

		self.input_slice = slice(0, input_width)
		self.input_indices = arange(self.total_window_size)[self.input_slice]

		self.label_start = self.total_window_size - self.label_width
		self.labels_slice = slice(self.label_start, None)
		self.label_indices = arange(self.total_window_size)[self.labels_slice]

	def split_window(self, feature_batch, label_batch) -> Tuple[tf.Tensor, tf.Tensor]:
		"""Split the features and labels, of length self.total_windows_size into an (input, output)
		data pair.

		Args:
			features: input features over current window.
			labels: output labels over current window.

		Returns:
			Tuple[tf.data.Datasets, tf.data.Datasets]: (input, output) pair.
		"""
		inputs = feature_batch[:, self.input_slice, :]
		output = label_batch[:, self.labels_slice, :]
		
		# Slicing doesn't preserve static shape information, so set the shapes
		# manually. This way the `tf.data.Datasets` are easier to inspect.
		inputs.set_shape([None, self.input_width, None])
		output.set_shape([None, self.label_width, None])

		return inputs, output

	def get_example(self):
		# Stack three slices, the length of the total window.
		example_features = tf.stack([array(self.features[:self.total_window_size]),
			array(self.features[100:100+self.total_window_size]),
			array(self.features[200:200+self.total_window_size])])
		
		example_labels = tf.stack([array(self.labels[:self.total_window_size]),
			array(self.labels[100:100+self.total_window_size]),
			array(self.labels[200:200+self.total_window_size])])

		example_inputs, example_ouputs = self.split_window(example_features, example_labels)

		print('All shapes are: (batch, time, features)')
		print(f'Window shape: {example_features.shape}')
		print(f'Inputs shape: {example_inputs.shape}')
		print(f'Labels shape: {example_ouputs.shape}')

		return example_inputs, example_ouputs

	def plot(self, model=None, plot_col='NOSE_x', max_subplots=3):
		inputs, labels = self.get_example()
		plt.figure(figsize=(12, 8))
		plot_col_index = self.column_indices[plot_col]
		frame_num_index = self.column_indices['frame_num']
		max_n = min(max_subplots, len(inputs))
		for n in range(max_n):
			plt.subplot(max_n, 1, n+1)
			plt.ylabel(f'{plot_col} [act]')
			feature_values = inputs[n, :, plot_col_index]
			x_axis = inputs[n, :, frame_num_index]
			
			plt.xlim((x_axis[0], x_axis[-1]+1))
			plt.plot(x_axis, feature_values,
				label='Inputs', marker='.', zorder=-10)

			label_col_index = plot_col_index
			if label_col_index is None:
				continue
			
			label_name = unbinarize_labels(array(labels[n, :, :]))
			label_x_position = x_axis[-1] + 0.2
			plt.text(label_x_position, mean(feature_values), label_name, c='#2ca02c', 
				label="Label")
			
			# plt.scatter(self.label_indices, labels[n, :, label_col_index],
			# 	edgecolors='k', label='Labels', c='#2ca02c', s=64)
			# if model is not None:
			# 	predictions = model(inputs)
			# 	plt.scatter(self.label_indices, predictions[n, :, label_col_index],
			# 		marker='X', edgecolors='k', label='Predictions',
			# 		c='#ff7f0e', s=64)

			if n == 0:
				plt.legend()

		plt.xlabel('Frame number')

	def __repr__(self):
		return '\n'.join([
			f'Total window size: {self.total_window_size}',
			f'Input indices: {self.input_indices}',
			f'Label indices: {self.label_indices}'])

In [None]:
w1 = WindowGenerator(input_width=5, label_width=1, shift=1, valid_segment=temp)
print(w1)

In [None]:
ds = w1.make_dataset(w1.features)

In [None]:
w1.plot()

In [None]:
# normalizing, do this in the training pipeline, determine values only from training data
train_mean = temp.mean()
train_std = temp.std()

temp = (temp - train_mean) / train_std
#temp_val = ...
#temp_test = ...


In [None]:
plt.figure(figsize=(16, 6))
features_melted = temp.melt(var_name="Column", value_name="Raw")
ax = sns.violinplot(x="Column", y="Raw", data=features_melted)
_ = ax.set_xticklabels(temp.keys(), rotation=90)

In [None]:
import tensorflow as tf

from src.common.model import ClassificationModel

In [None]:
class LSTM(ClassificationModel):
    
    pass