In [2]:
import os
import glob
import pandas as pd
import numpy as np

### Find folder based on speed

In [47]:
def speed_to_folder(speed):
    if speed == 0.05232:
        return "3p6" # 1/0.4735 = 2.1193
    elif speed == 0.06528:
        return "4p4"
    elif speed == 0.06852:
        return "4p6"
    elif speed == 0.07824:
        return "5p2"
    elif speed == 0.09768:
        return "6p4"
    elif speed == 0.10092:
        return "6p6"
    elif speed == 0.11064:
        return "7p2"
    elif speed == 0.12036:
        return "7p8"
    elif speed == 0.13008:
        return "8p4"
    elif speed == 0.16248:
        return "10p4"
    elif speed == 0.17868:
        return "11p4"
    else:
        return "test"

In [6]:
def distance(x, y, z):
    return np.sqrt(x**2.0 + y**2.0 + z**2.0)

### Merge files in the folders

In [7]:
def get_files_list(speed):
    folder = speed_to_folder(speed)
    # path = os.getcwd()
    csv_files = glob.glob(os.path.join(f"/mnt/d/sources/data/DL-PTV/{folder}-1", "*.csv"))+glob.glob(os.path.join(f"/mnt/d/sources/data/DL-PTV/{folder}-2", "*.csv"))

    data = []
    for csv in csv_files:
        frame = pd.read_csv(csv, sep=";")
        frame.columns= ["x","y","z","vx","vy","vz"]
        frame = pd.merge(frame.iloc[:50689,:], frame.iloc[50690:,:], how="inner", on=["x", "y", "z"])
        frame.columns=["x","y","z","vx","vy","vz","px","py","pz"]
        frame['time'] = int(csv[-8:][:4])
        frame["distance"] = distance(frame["x"], frame["y"], frame["z"])
        frame["distance"] = frame["distance"].round(decimals=0)
        data.append(frame)

    df = pd.concat(data, ignore_index=True)
    df.columns=["x","y","z","vx","vy","vz","px","py","pz","time","distance"]
    # Converting kPa to Pa for presurre columns
    df["px"]=df["px"]*1000
    df["py"]=df["py"]*1000
    df["pz"]=df["pz"]*1000
    # Scaling up the velocity
    df["vx"]=df["vx"]*10
    df["vy"]=df["vy"]*10
    df["vz"]=df["vz"]*10
    # Rounding decimal place
    df["vx"] = df["vx"].round(decimals=2)
    df["vy"] = df["vy"].round(decimals=2)
    df["vz"] = df["vz"].round(decimals=2)
    df["px"] = df["px"].round(decimals=2)
    df["py"] = df["py"].round(decimals=2)
    df["pz"] = df["pz"].round(decimals=2)

    df["x"] = df["x"].round(decimals=0)
    df["y"] = df["y"].round(decimals=0)
    df["z"] = df["z"].round(decimals=0)


    return df

In [8]:
# final_df = pd.read_pickle(f"/mnt/d/sources/data/DL-PTV/merged/Old_merged/3p6.pkl", compression="zip")

In [9]:
speed = 0.05232
folder = speed_to_folder(speed)
final_df = get_files_list(speed)

In [10]:
final_df.head()

Unnamed: 0,x,y,z,vx,vy,vz,px,py,pz,time,distance
0,-117.0,87.0,-33.0,0.46,0.02,-0.0,2.47,-2.39,-0.49,363,150.0
1,-113.0,87.0,-33.0,0.46,0.02,0.0,2.54,-2.25,-0.54,363,146.0
2,-109.0,87.0,-33.0,0.47,0.03,0.0,2.44,-2.0,-0.58,363,143.0
3,-105.0,87.0,-33.0,0.47,0.03,0.0,2.21,-1.68,-0.61,363,140.0
4,-101.0,87.0,-33.0,0.47,0.03,0.0,1.89,-1.36,-0.6,363,137.0


In [11]:
final_df.shape

(60823200, 11)

In [15]:
speeds=[0.06528, 0.06852, 0.07824, 0.09768, 0.10092, 0.11064, 0.12036, 0.13008, 0.16248, 0.17868]
for i in speeds:
    folder = speed_to_folder(i)
    final_df = get_files_list(i)
    final_df.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/{folder}.pkl", compression='zip')
    print(f"{folder} is done!")

4p4 is done!
4p6 is done!
5p2 is done!
6p4 is done!
6p6 is done!
7p2 is done!
7p8 is done!
8p4 is done!
10p4 is done!
11p4 is done!


In [13]:
final_df.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/{folder}.pkl", compression='zip')

## Test

In [48]:
path = "/mnt/d/sources/data/DL-PTV/Engineered/"

In [49]:
df1 = pd.read_pickle(path + "3p6.pkl", compression="zip")

In [55]:
# t = df1.sort_index().head(1200)
df1[df1["time"]==300]

Unnamed: 0,x,y,z,vx,vy,vz,px,py,pz,time,distance
9681026,-117.0,87.0,-33.0,0.47,0.03,0.03,1.90,-0.53,0.57,300,150.0
9681027,-113.0,87.0,-33.0,0.47,0.04,0.03,2.00,-0.34,0.76,300,146.0
9681028,-109.0,87.0,-33.0,0.47,0.04,0.02,2.16,-0.05,0.95,300,143.0
9681029,-105.0,87.0,-33.0,0.47,0.04,0.02,2.33,0.27,1.10,300,140.0
9681030,-101.0,87.0,-33.0,0.48,0.04,0.01,2.48,0.49,1.17,300,137.0
...,...,...,...,...,...,...,...,...,...,...,...
9731707,109.0,-83.0,34.0,0.46,0.00,0.03,1.20,-0.26,1.46,300,141.0
9731708,113.0,-83.0,34.0,0.46,0.01,0.03,1.42,-0.38,1.18,300,144.0
9731709,117.0,-83.0,34.0,0.46,0.01,0.02,1.70,-0.54,0.88,300,147.0
9731710,121.0,-83.0,34.0,0.45,0.01,0.02,2.01,-0.73,0.57,300,151.0


In [8]:
t.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/test.pkl", compression='zip')

In [135]:
df = df1.loc[df1["time"]<3]

In [136]:
x_values = df["x"].unique()
y_values = df["y"].unique()
z_values = df["z"].unique()

In [139]:
df = add_transformed_values(df, x_values,"x")
df = add_transformed_values(df, y_values,"y")
df = add_transformed_values(df, z_values,"z")

In [140]:
df.head()

Unnamed: 0,x,y,z,vx,vy,vz,time,transformed_x,transformed_y,transformed_z
0,-125.07891,86.888915,-33.059413,0.049976,0.002444,0.009331,1,-32.0,-22.0,-9.0
1,-121.116733,86.888915,-33.059413,0.049832,0.002287,0.009501,1,-31.0,-22.0,-9.0
2,-117.154557,86.888915,-33.059413,0.04963,0.002129,0.009603,1,-30.0,-22.0,-9.0
3,-113.192381,86.888915,-33.059413,0.049365,0.001991,0.009568,1,-29.0,-22.0,-9.0
4,-109.230205,86.888915,-33.059413,0.049041,0.001901,0.009327,1,-28.0,-22.0,-9.0


In [None]:
final_df.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/{folder}.pkl", compression='zip')

In [34]:
test = pd.read_pickle("/mnt/d/sources/data/DL-PTV/Engineered/test.pkl", compression='zip')


In [41]:
labels= test.drop(test.columns.difference(["x","y","z","time"]), axis=1).to_numpy()
data = test.drop(test.columns.difference(["vx","vy","vz","px","py","pz","distance"]), axis=1)

In [45]:
labels[0:5]

array([[-117.,   87.,  -33.,  363.],
       [-113.,   87.,  -33.,  363.],
       [-109.,   87.,  -33.,  363.],
       [-105.,   87.,  -33.,  363.],
       [-101.,   87.,  -33.,  363.]])

In [43]:
test = test.to_numpy()

In [36]:
test.head()

Unnamed: 0,x,y,z,vx,vy,vz,px,py,pz,time,distance
0,-117.0,87.0,-33.0,0.46,0.02,-0.0,2.47,-2.39,-0.49,363,150.0
1,-113.0,87.0,-33.0,0.46,0.02,0.0,2.54,-2.25,-0.54,363,146.0
2,-109.0,87.0,-33.0,0.47,0.03,0.0,2.44,-2.0,-0.58,363,143.0
3,-105.0,87.0,-33.0,0.47,0.03,0.0,2.21,-1.68,-0.61,363,140.0
4,-101.0,87.0,-33.0,0.47,0.03,0.0,1.89,-1.36,-0.6,363,137.0


In [30]:
labels = np.concatenate((test[:, 0:3]))
data = test[:, 3:]

In [31]:
labels[0:2]

array([[-117.,   87.,  -33.],
       [-113.,   87.,  -33.]])

In [32]:
data[0:3]

array([[ 4.60e-01,  2.00e-02, -0.00e+00,  2.47e+00, -2.39e+00, -4.90e-01,
         3.63e+02,  1.50e+02],
       [ 4.60e-01,  2.00e-02,  0.00e+00,  2.54e+00, -2.25e+00, -5.40e-01,
         3.63e+02,  1.46e+02],
       [ 4.70e-01,  3.00e-02,  0.00e+00,  2.44e+00, -2.00e+00, -5.80e-01,
         3.63e+02,  1.43e+02]])