In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# take 100 sequential data points with the same label and transform into (100, 2, 3)
# 100 rows, 2 columns (gyro, accel), 3 dims (x, y, z)
def generate_dataset(datapoints, n_steps, prefix, df):
	train_df = pd.DataFrame()

	for i in range(df.index.start, df.index.stop, n_steps):
		if len(set(list(df.loc[i:i+datapoints-1]["label"]))) != 1:
			continue

		gyro_x = list(df.loc[i:i+datapoints-1]["x_gyro"])
		gyro_y = list(df.loc[i:i+datapoints-1]["y_gyro"])
		gyro_z = list(df.loc[i:i+datapoints-1]["z_gyro"])

		acce_x = list(df.loc[i:i+datapoints-1]["x"])
		acce_y = list(df.loc[i:i+datapoints-1]["y"])
		acce_z = list(df.loc[i:i+datapoints-1]["z"])

		new_df_dict = dict()
		for (col, name) in [(gyro_x, "gyro_x"), (gyro_y, "gyro_y"), (gyro_z, "gyro_z"), (acce_x, "acce_x"), (acce_y, "acce_y"), (acce_z, "acce_z")]:
			if len(col) == datapoints:
				for c in range(datapoints):
					new_df_dict[name + "_" + str(c)] = [col[c]]


		new_df_dict["longitude"] = [df.loc[i]["longitude"]]
		new_df_dict["latitude"] = [df.loc[i]["latitude"]]
		new_df_dict["label"] = [df.loc[i]["label"]]

		new_df = pd.DataFrame.from_dict(new_df_dict)
		
		train_df = pd.concat([train_df, new_df], ignore_index = True)

	# relabel and save to file
	train_df["label"] = train_df["label"].map({"lift": 0, "piste": 1})
	train_df = train_df.dropna()
	# save to file
	train_df.to_csv("./data/" + prefix + "/" + prefix + "_" + str(datapoints) + ".csv", index=False)

In [4]:
# generate different size datasets

# Augment training data
datapoints = [50, 100, 200, 300, 400, 500]	
# Every n steps take the next 100 datapoints and add them as a row
n_steps = 10
# The ratio datapoints/n_steps is how much the data will be augmented

# change train and test
prefixes = ["train", "test"]

for d in datapoints:
	for p in prefixes:
		df = pd.read_csv("./data/" + p + "/labelled.csv")
		print("Generating dataset: " + "prefix: " + p + " and datapoints: " + str(d) + " ...")
		generate_dataset(d, n_steps, p, df)

Generating dataset: prefix: train and datapoints: 400 ...
Generating dataset: prefix: test and datapoints: 400 ...
Generating dataset: prefix: train and datapoints: 500 ...
Generating dataset: prefix: test and datapoints: 500 ...


In [6]:
# Generate single dataset for testing
prefix = "test"
datapoints = 400
n_steps = 30
df = pd.read_csv("./data/" + prefix + "/labelled.csv")

columns = ['y', 'qz', 'qy', 'qx', 'qw', 'pitch']

train_df = pd.DataFrame()

for i in range(df.index.start, df.index.stop, n_steps):
	if len(set(list(df.loc[i:i+datapoints-1]["label"]))) != 1:
		continue

	new_df_dict = dict()

	for col in columns:
		col_list = list(df.loc[i:i+datapoints-1][col])

		if len(col_list) == datapoints:
			for c in range(datapoints):
				new_df_dict[col + "_" + str(c)] = [col_list[c]]


	new_df_dict["longitude"] = [df.loc[i]["longitude"]]
	new_df_dict["latitude"] = [df.loc[i]["latitude"]]
	new_df_dict["speed"] = [df.loc[i]["speed"]]
	new_df_dict["altitude"] = [df.loc[i]["altitude"]]
	new_df_dict["label"] = [df.loc[i]["label"]]

	new_df = pd.DataFrame.from_dict(new_df_dict)
	
	train_df = pd.concat([train_df, new_df], ignore_index = True)

# relabel and save to file
train_df["label"] = train_df["label"].map({"lift": 0, "piste": 1})
train_df = train_df.dropna()

In [5]:
# save to file
train_df.to_csv("./data/" + prefix + "/" + prefix + "_quat_final_" + str(datapoints) + ".csv", index=False)