# ETL.

ETL stands for "extract, transform, load" and it is exactly what it does. In our case the ETL takes the raw-data, which has been collected using the cell-phones out in the field, and transforms it in a way that makes Data Science easier. In regular intervals the ETL takes generates snapshots of the entire data and stores them in a folder with a timestamp.

In [None]:
# This makes sure that you got access to the cgmcore-module
import sys
sys.path.insert(0, "..")

# ETL-Data-Generator.

Data-generators are a core concept of Keras (https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly). They allow for data-generation on the fly. This includes but is not limited to data-augmentation and data-preprocessing.

Throughout the project we make extensive use of data-generators. The ETL-Data-Generator works explicitly on the data as provided by the ETL.

# Accessing the latest dataset.

We provide means to retrieve the latest data-set-path as follows.

In [None]:
from cgmcore.etldatagenerator import get_dataset_path

dataset_path = get_dataset_path("../../data/etl")
print("Using daataset-path {}.".format(dataset_path))

# Finding all PCD-files.

In [None]:
import glob2 as glob
import os

all_pcd_paths = glob.glob(os.path.join(dataset_path, "**/*.pcd"))
print("Found {} PCD-files.".format(len(all_pcd_paths)))

# Selecting a random PCD-file.

In [None]:
import random
random_pcd_paths = []
for i in range(0, 9):
    random_pcd_path = random.choice(all_pcd_paths)
    random_pcd_paths.append(random_pcd_path)
    print("Using random PCD-path {}.".format(random_pcd_path))

# Loading the pointcloud as a numpy array.

This is one of the many helper-functions provided by cgmcore.utils.

In [None]:
from cgmcore.utils import load_pcd_as_ndarray

pointclouds = []
for i in range(0, 9):
    pointcloud = load_pcd_as_ndarray(random_pcd_paths[i])
    pointclouds.append(pointcloud)
    print("Loaded pointcloud with shape {}.".format(pointcloud.shape))
pointcloud_1 = load_pcd_as_ndarray("../../data/etl/2018_07_31_10_52/MP_WHH_2519/1540363099245/pcd/pc_MP_WHH_2519_1540362844399_104_011.pcd")


# Rendering a pointcloud.

In [None]:
import numpy as np

def rotate(x, y, z, theta_deg):
    theta = theta_deg*2*np.pi/360
    x = x*np.cos(theta) - z*np.sin(theta)
    z = x*np.sin(theta) + z*np.cos(theta)
    return x, y, z

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def render_pointcloud_multiple(multiple_points, title=None):
    """
    Renders a point-cloud.
    """

    fig = plt.figure(figsize=(10, 10))
    for i in range(0,4):
        for j in range(0,2):
            plt.subplot(4,2,i*2+j+1)
            ax = fig.add_subplot(4*100+20+i*2+j+1, projection='3d')
            points = multiple_points[2*i+j]
            ax.scatter(rotate(-points[:,1], points[:,2], points[:,0], 60), s=0.5, cmap="gray", alpha=0.5)

            ax.set_xlabel("y")
            ax.set_ylabel("z")
            ax.set_zlabel("x")

    if title != None:
        plt.title(title)

    plt.show()
    plt.close()
    
def render_pointcloud_single(points, angle, title=None):
    """
    Renders a point-cloud.
    """

    fig = plt.figure(figsize=(3, 3))
    ax = fig.add_subplot(111, projection='3d')

    x, y, z= rotate(points[:,0], -points[:,1], points[:,2] ,angle)
    ax.scatter(y, z, x, s=0.5, cmap="gray", alpha=0.5)

    ax.set_xlabel("y")
    ax.set_ylabel("z")
    ax.set_zlabel("x")

    if title != None:
        plt.title(title)

    plt.show()
    #plt.close()

In [None]:
pointcloud_1.shape

In [None]:
from random import randint
slope = np.zeros(len(pointcloud_1))
for i in np.arange(len(pointcloud_1)):
    j = randint(0,len(pointcloud_1)-1)
    slope[i] = 360/2/np.pi * np.arctan((pointcloud_1[i,2]-pointcloud_1[j,2]))/(pointcloud_1[i,0]-pointcloud_1[j,0])
print(slope)
print(np.max(slope))
plt.figure()
plt.hist(slope)
plt.xlim([-360,360])

In [None]:

y90, y95 = np.percentile(pointclouds[8][:,1], [90,95])
y5, y10 = np.percentile(pointclouds[8][:,1], [15,20])
x5, x10 = np.percentile(pointclouds[8][:,0], [15,20])

#print(x5, x10, y5, y10)

floor_points = []
x_max = -1
x_max_idx = 0
x_min = 100
x_min_idx = 0
for x, y, z, c in pointclouds[8]:
    if x > x5 and x < x10 and y > y5 and y < y10:
        if x > x_max:
            x = x_max
            
        floor_points.append((x, y, z))


x_max = np.max(np.asarray(floor_points)[:,0])
x_min = np.min(np.asarray(floor_points)[:,0])
print(x_max)
print(x_min)

In [None]:
plt.hist(pointclouds[8][:,0])


In [None]:
%matplotlib notebook

In [None]:
render_pointcloud_single(pointcloud_1,0, "A random pointcloud")

In [None]:
def rotate(x, y, z, theta_deg):
    theta = theta_deg*2*np.pi/360
    x_ = x*np.cos(theta) - z*np.sin(theta)
    z_ = x*np.sin(theta) + z*np.cos(theta)
    return x_, y, z_

In [None]:
%matplotlib notebook
render_pointcloud_single(pointcloud_1, "A random pointcloud")

In [None]:
#from cgmcore.utils import render_pointcloud
render_pointcloud_multiple(pointclouds, "A random pointcloud")