# ETL.

ETL stands for "extract, transform, load" and it is exactly what it does. In our case the ETL takes the raw-data, which has been collected using the cell-phones out in the field, and transforms it in a way that makes Data Science easier. In regular intervals the ETL takes generates snapshots of the entire data and stores them in a folder with a timestamp.

In [None]:
# This makes sure that you got access to the cgmcore-module
import sys
sys.path.insert(0, "..")

# ETL-Data-Generator.

Data-generators are a core concept of Keras (https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly). They allow for data-generation on the fly. This includes but is not limited to data-augmentation and data-preprocessing.

Throughout the project we make extensive use of data-generators. The ETL-Data-Generator works explicitly on the data as provided by the ETL.

# Accessing the latest dataset.

We provide means to retrieve the latest data-set-path as follows.

In [None]:
from cgmcore.etldatagenerator import get_dataset_path

dataset_path = get_dataset_path("../../data/etl")
print("Using daataset-path {}.".format(dataset_path))

# Finding all PCD-files.

In [None]:
import glob2 as glob
import os

all_pcd_paths = glob.glob(os.path.join(dataset_path, "**/*.pcd"))
print("Found {} PCD-files.".format(len(all_pcd_paths)))

# Selecting a random PCD-file.

In [None]:
import random
random_pcd_paths = []
for i in range(0, 9):
    random_pcd_path = random.choice(all_pcd_paths)
    random_pcd_paths.append(random_pcd_path)
    print("Using random PCD-path {}.".format(random_pcd_path))

# Loading the pointcloud as a numpy array.

This is one of the many helper-functions provided by cgmcore.utils.

In [None]:
from cgmcore.utils import load_pcd_as_ndarray

pointclouds = []
for i in range(0, 9):
    pointcloud = load_pcd_as_ndarray(random_pcd_paths[i])
    pointclouds.append(pointcloud)
    print("Loaded pointcloud with shape {}.".format(pointcloud.shape))

# Rendering a pointcloud.

In [None]:
import numpy as np

def rotate(x, y, z, theta_deg):
    theta = theta_deg*2*np.pi/360
    x_ = x*np.cos(theta) - z*np.sin(theta)
    z_ = x*np.sin(theta) + z*np.cos(theta)
    return x_, y, z_

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def render_pointcloud_multiple(multiple_points, title=None):
    """
    Renders a point-cloud.
    """

    fig = plt.figure(figsize=(10, 10))
    for i in range(0,4):
        for j in range(0,2):
            plt.subplot(4,2,i*2+j+1)
            ax = fig.add_subplot(4*100+20+i*2+j+1, projection='3d')
            points = multiple_points[2*i+j]
            x, y, z = rotate(-points[:,1], points[:,2], points[:,0], 0)
            ax.scatter(y, z, x, s=0.5, cmap="gray", alpha=0.5)

            ax.set_xlabel("y")
            ax.set_ylabel("z")
            ax.set_zlabel("x")

    if title != None:
        plt.title(title)

    plt.show()
    plt.close()
    
def render_pointcloud_single(points, angle_deg, title=None):
    """
    Renders a point-cloud.
    """

    fig = plt.figure(figsize=(3, 3))
    ax = fig.add_subplot(111, projection='3d')

    x, y, z= rotate(points[:,0], -points[:,1], points[:,2] , angle_deg)
    ax.scatter(x, y, z, s=0.5, cmap="gray", alpha=0.5)

    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("z")

    if title != None:
        plt.title(title)

    plt.show()
    #plt.close()
    
def get_rotation(pointcloud):
    y90, y95 = np.percentile(pointcloud[:,1], [90,95])
    y5, y10 = np.percentile(pointcloud[:,1], [5,10])
    x5, x10 = np.percentile(pointcloud[:,0], [5,10])

    #print(x5, x10, y5, y10)

    floor_points = []
    x_max = -1
    x_max_point = ()
    x_min = 100
    x_min_point = ()
    for x, y, z, c in pointclouds[8]:
        if x > x5 and x < x10 and y > y5 and y < y10:
            if x > x_max:
                x_max = x
                x_max_point = (x, y, z)
            if x < x_min:
                x_min = x
                x_min_point = (x, y, z)
            floor_points.append((x, y, z))


    angle = (x_max_point[0]-x_min_point[0])/(x_max_point[2]-x_min_point[2])
    print("x_max", x_max_point)
    print("x_min", x_min_point)
    deg = np.arctan(angle)*360/(2*np.pi)
    return deg

In [None]:
from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=3)
sklearn_pca.fit(pointclouds[5][:,0:3])
print(sklearn_pca.components_)
print(sklearn_pca.explained_variance_)
#normal = np.cross(sklearn_pca.components_[0], sklearn_pca.components_[1])
print(sklearn_pca.components_[0][0])
theta = np.arctan(sklearn_pca.components_[0][2]/sklearn_pca.components_[0][0])*360/(2 * np.pi)
print(theta)

In [None]:
#print(get_rotation(pointclouds[3]))
render_pointcloud_single(pointclouds[5], -theta)

In [None]:
plt.hist(pointclouds[8][:,0])


In [None]:
%matplotlib

In [None]:
#render_pointcloud_single(pointclouds[8], "A random pointcloud")

In [None]:
#from cgmcore.utils import render_pointcloud
render_pointcloud_multiple(pointclouds, "A random pointcloud")