In [4]:

import numpy as np
import pandas as pd
import os
import sys
root =  "/home/bugabuga/hack/git"
sys.path.insert(0, root)

# fix random seed for reproducibility
np.random.seed(7)

import sys
root =  "/home/bugabuga/hack/git"
sys.path.insert(0, root)


In [14]:

def get_3d_sq_vel(x):
    square_speed = np.array(x.velX) ** 2 + np.array(x.velY) ** 2 + np.array(x.velZ) ** 2
    return square_speed.tolist()


def get_3d_vel(x):
    return (np.array(get_3d_sq_vel(x)) ** 0.5).tolist()


def total_energy_per_unit_mass(x):
    return (np.array(get_3d_sq_vel(x)) * 0.5 + np.array(x.posZ)).tolist()


def mypolyfit_y(y, deg):
    y = np.array(y)
    x = np.arange(0, y.shape[0]) / 2
    a = np.polyfit(x, y, deg)
    yy = 0
    for j in range(len(a)):
        yy += a[-j - 1] * x ** j
    return a, yy


def smooth_velocity(x, deg):
    a_x, vel_x = mypolyfit_y(x.velX, deg)
    a_y, vel_y = mypolyfit_y(x.velY, deg)
    a_z, vel_z = mypolyfit_y(x.velZ, deg)
    return (a_x, a_y, a_z), (vel_x, vel_y, vel_z)


def get_smoothed_columns(x):
    (a_x, a_y, a_z), (vel_x, vel_y, vel_z) = smooth_velocity(x, 1)
    smoothed_velocity_sq = vel_x ** 2 + vel_y ** 2 + vel_z ** 2
    smoothed_velocity = smoothed_velocity_sq ** 0.5
    energy = smoothed_velocity_sq * 0.5 + np.array(x.posZ)

    return pd.Series([a_x[0], a_y[0], a_z[0], smoothed_velocity.tolist(), energy.tolist(),
                      vel_x, vel_y, vel_z])


def get_min_max_avg(x, columns_names):
    l = []
    for column_name in columns_names:
        data = x[column_name]
        l += [min(data), max(data), np.mean(data)]
    return pd.Series(l)


def create_features(df):
    df_smoothed = pd.DataFrame()
    df_smoothed[["ax", "ay", "az", "3d_vel", "tot_e", "smooth_vel_x", "smooth_vel_y", "smooth_vel_z"]] = df.apply(get_smoothed_columns, axis=1)
    df_smoothed[["posZ", "class"]] = df[["posZ", "class"]]

    min_max_avg_features = df_smoothed.apply(lambda x: get_min_max_avg(x, ["3d_vel", "tot_e", "smooth_vel_z", "posZ"]),
                                             axis=1)
    return pd.concat([min_max_avg_features, df_smoothed[['ax', 'ay', 'az', 'class'],
    ]], axis=1)






In [23]:
df = pd.read_pickle(os.path.expanduser("~/hack/data/decreasing_data.p"))
df.head()
ts = 15
print(df.shape)
df_dropped = df[df.apply(lambda x: len(x["posX"])>ts, axis=1)].iloc[:200,:]
print(df_dropped.shape)

(17091, 7)
(200, 7)


In [20]:
df_features = create_features(df_dropped)

KeyboardInterrupt: 

In [24]:
def get_polyfit_coefs(y, deg=4):
    coefs, _ = mypolyfit_y(y, deg)
    return coefs.tolist()

def get_coef_features(x, columns_names):
    l = []
    for column in columns_names:
        coefs = get_polyfit_coefs(x[column])
        l += coefs
    return pd.Series(l)


In [44]:
df_dropped2 = pd.concat([df_dropped.apply(get_3d_vel, axis=1),
df_dropped.apply(get_3d_sq_vel, axis=1),
df_dropped.apply(total_energy_per_unit_mass, axis=1) ], axis=1)
df_dropped2.columns = ["3dvel", "3dsq_vel", "e"]

In [45]:
df_dropped2.shape

(200, 3)

In [46]:
df1 = df_dropped.apply(lambda x: get_coef_features(x, ['posZ', 'velZ', 'posX', 'velX']), axis=1)
df2 = df_dropped.apply(lambda x: get_min_max_avg(x, ['posY', 'posZ', 'velZ']), axis=1)
df3 = df_dropped2.apply(lambda x: get_coef_features(x, ["3dvel", "3dsq_vel", "e"]), axis=1)


In [51]:
df_features = pd.concat([df1, df2, df3], axis=1)

((200, 44), (200, 20), (200, 9), (200, 15))