In [3]:
import os
import glob
import pandas as pd
import numpy as np

### Find folder based on speed

In [57]:
def speed_to_folder(speed):
    if speed == 0.05232:
        return "3p6" # 1/0.4735 = 2.1193
    elif speed == 0.06528:
        return "4p4"
    elif speed == 0.06852:
        return "4p6"
    elif speed == 0.07824:
        return "5p2"
    elif speed == 0.09768:
        return "6p4"
    elif speed == 0.10092:
        return "6p6"
    elif speed == 0.11064:
        return "7p2"
    elif speed == 0.12036:
        return "7p8"
    elif speed == 0.13008:
        return "8p4"
    elif speed == 0.16248:
        return "10p4"
    elif speed == 0.17868:
        return "11p4"
    else:
        return "test"

In [58]:
def distance(x, y, z):
    return np.sqrt(x**2.0 + y**2.0 + z**2.0)

### Merge files in the folders

In [7]:
def get_files_list(speed):
    folder = speed_to_folder(speed)
    # path = os.getcwd()
    csv_files = glob.glob(os.path.join(f"/mnt/d/sources/data/DL-PTV/{folder}-1", "*.csv"))+glob.glob(os.path.join(f"/mnt/d/sources/data/DL-PTV/{folder}-2", "*.csv"))

    data = []
    for csv in csv_files:
        frame = pd.read_csv(csv, sep=";")
        frame.columns= ["x","y","z","vx","vy","vz"]
        frame = pd.merge(frame.iloc[:50689,:], frame.iloc[50690:,:], how="inner", on=["x", "y", "z"])
        frame.columns=["x","y","z","vx","vy","vz","px","py","pz"]
        frame['time'] = int(csv[-8:][:4])
        frame["distance"] = distance(frame["x"], frame["y"], frame["z"])
        frame["distance"] = frame["distance"].round(decimals=0)
        data.append(frame)

    df = pd.concat(data, ignore_index=True)
    df.columns=["x","y","z","vx","vy","vz","px","py","pz","time","distance"]
    # Converting kPa to Pa for presurre columns
    df["px"]=df["px"]*1000
    df["py"]=df["py"]*1000
    df["pz"]=df["pz"]*1000
    # Scaling up the velocity
    df["vx"]=df["vx"]*10
    df["vy"]=df["vy"]*10
    df["vz"]=df["vz"]*10
    # Rounding decimal place
    df["vx"] = df["vx"].round(decimals=2)
    df["vy"] = df["vy"].round(decimals=2)
    df["vz"] = df["vz"].round(decimals=2)
    df["px"] = df["px"].round(decimals=2)
    df["py"] = df["py"].round(decimals=2)
    df["pz"] = df["pz"].round(decimals=2)

    df["x"] = df["x"].round(decimals=0)
    df["y"] = df["y"].round(decimals=0)
    df["z"] = df["z"].round(decimals=0)


    return df

In [8]:
# final_df = pd.read_pickle(f"/mnt/d/sources/data/DL-PTV/merged/Old_merged/3p6.pkl", compression="zip")

In [9]:
speed = 0.05232
folder = speed_to_folder(speed)
final_df = get_files_list(speed)

In [10]:
final_df.head()

Unnamed: 0,x,y,z,vx,vy,vz,px,py,pz,time,distance
0,-117.0,87.0,-33.0,0.46,0.02,-0.0,2.47,-2.39,-0.49,363,150.0
1,-113.0,87.0,-33.0,0.46,0.02,0.0,2.54,-2.25,-0.54,363,146.0
2,-109.0,87.0,-33.0,0.47,0.03,0.0,2.44,-2.0,-0.58,363,143.0
3,-105.0,87.0,-33.0,0.47,0.03,0.0,2.21,-1.68,-0.61,363,140.0
4,-101.0,87.0,-33.0,0.47,0.03,0.0,1.89,-1.36,-0.6,363,137.0


In [11]:
final_df.shape

(60823200, 11)

In [15]:
speeds=[0.06528, 0.06852, 0.07824, 0.09768, 0.10092, 0.11064, 0.12036, 0.13008, 0.16248, 0.17868]
for i in speeds:
    folder = speed_to_folder(i)
    final_df = get_files_list(i)
    final_df.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/{folder}.pkl", compression='zip')
    print(f"{folder} is done!")

4p4 is done!
4p6 is done!
5p2 is done!
6p4 is done!
6p6 is done!
7p2 is done!
7p8 is done!
8p4 is done!
10p4 is done!
11p4 is done!


In [13]:
final_df.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/{folder}.pkl", compression='zip')

## Test

In [4]:
path = "/mnt/d/sources/data/DL-PTV/Engineered/"

In [7]:
df1 = pd.read_pickle(path + "multiple_speeds.pkl", compression="zip")

In [12]:
# t = df1.sort_index().head(1200)
# df1.iloc[:500]
df1["speed"].unique()

array([ 5.232,  6.528,  6.852,  7.824,  9.768, 10.092])

In [11]:
df1[np.isclose(df1["speed"],6.5280)]

Unnamed: 0,x,y,z,vx,vy,vz,px,py,pz,distance,speed
50686,-117.0,87.0,-33.0,0.56,0.04,0.02,1.68,-9.85,-2.00,150.0,6.528
50687,-113.0,87.0,-33.0,0.56,0.04,0.02,2.40,-9.85,-2.03,146.0,6.528
50688,-109.0,87.0,-33.0,0.56,0.04,0.03,2.85,-9.66,-1.83,143.0,6.528
50689,-105.0,87.0,-33.0,0.56,0.05,0.03,3.08,-9.24,-1.37,140.0,6.528
50690,-101.0,87.0,-33.0,0.56,0.05,0.02,3.21,-8.61,-0.67,137.0,6.528
...,...,...,...,...,...,...,...,...,...,...,...
101367,109.0,-83.0,34.0,0.55,-0.02,0.07,-1.12,-0.49,6.93,141.0,6.528
101368,113.0,-83.0,34.0,0.54,-0.02,0.07,-0.45,-0.72,6.32,144.0,6.528
101369,117.0,-83.0,34.0,0.54,-0.02,0.07,0.29,-0.90,5.79,147.0,6.528
101370,121.0,-83.0,34.0,0.53,-0.02,0.07,1.08,-1.08,5.34,151.0,6.528


In [35]:
path = "/mnt/d/sources/data/DL-PTV/Engineered/multiple_speeds.pkl"
speeds = [ 5.232,  6.528,  6.852,  7.824,  9.768, 10.092]

data = []
df = pd.read_pickle(path, compression="zip")
for speed in speeds:
    frame= df[np.isclose(df["speed"],speed)]
    data.append(frame.iloc[:500])
    print(str(speed)+" is done!")

df_speeds = pd.concat(data, ignore_index=True)
df_speeds.columns=["x","y","z","vx","vy","vz","px","py","pz","distance", "speed"]
df_speeds.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/multiple_speeds_500.pkl", compression='zip')

5.232 is done!
6.528 is done!
6.852 is done!
7.824 is done!
9.768 is done!
10.092 is done!


In [36]:
df = pd.read_pickle("/mnt/d/sources/data/DL-PTV/Engineered/multiple_speeds_500.pkl", compression="zip")
df["speed"].unique()

array([ 5.232,  6.528,  6.852,  7.824,  9.768, 10.092])

In [None]:
path = "/mnt/d/sources/data/DL-PTV/Engineered/"
files = ["3p6.pkl", "4p4.pkl", "4p6.pkl", "5p2.pkl", "6p4.pkl", "6p6.pkl"]
speeds=[0.05232, 0.06528, 0.06852, 0.07824, 0.09768, 0.10092]
data = []
j=0
for i in files:
    frame = pd.read_pickle(path + i, compression="zip")
    frame["speed"] = speeds[j]*100
    frame = frame[frame["time"]==300]
    data.append(frame.iloc[:500])
    j+=1
    print(i+" is done!")

df_speeds = pd.concat(data, ignore_index=True)
df_speeds.columns=["x","y","z","vx","vy","vz","px","py","pz","time","distance", "speed"]
df_speeds.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/multiple_speeds_500.pkl", compression='zip')

In [17]:
df_speeds["speed"] = df_speeds["speed"].round(decimals=2)

In [64]:
# df_speeds.groupby(["x","y","z"]).nunique()
import itertools
# Create a list of column names
cols = ['x', 'y', 'z']

# Generate all unique combinations of three columns
combinations = list(itertools.combinations(cols, 3))

# Print the list of combinations
print(combinations)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,vx,vy,vz,px,py,pz,distance,speed
x,y,z,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
-125.0,59.0,-33.0,6,4,6,6,6,6,1,6
-125.0,63.0,-33.0,6,4,5,6,6,6,1,6
-125.0,67.0,-33.0,6,4,4,6,6,6,1,6
-125.0,71.0,-33.0,6,2,5,6,6,6,1,6
-125.0,75.0,-33.0,6,4,4,6,6,6,1,6
...,...,...,...,...,...,...,...,...,...,...
125.0,71.0,-33.0,6,6,5,6,6,6,1,6
125.0,75.0,-33.0,6,6,5,6,6,6,1,6
125.0,79.0,-33.0,6,5,6,6,6,6,1,6
125.0,83.0,-33.0,6,5,6,6,6,6,1,6


In [50]:
a = pd.unique(df_speeds[["x","y","z"]].values.ravel())
dic = []
df_p = pd.DataFrame(columns=["x","y","z"])
for i in range(len(a)-2):

    dic.append([a[i], a[i+1], a[i+2]])
    df_p.loc[len(df_p.index)] = [a[i], a[i+1], a[i+2]]

# df_position = pd.concat(dic, ignore_index=True)
# df_position.columns=["x","y","z"]
# df_position
df_p

Unnamed: 0,x,y,z
0,-117.0,87.0,-33.0
1,87.0,-33.0,-113.0
2,-33.0,-113.0,-109.0
3,-113.0,-109.0,-105.0
4,-109.0,-105.0,-101.0
...,...,...,...
66,-121.0,79.0,75.0
67,79.0,75.0,71.0
68,75.0,71.0,67.0
69,71.0,67.0,63.0


In [60]:
df_speeds[df_speeds["speed"]==5.232].count()

x           500
y           500
z           500
vx          500
vy          500
vz          500
px          500
py          500
pz          500
distance    500
speed       500
dtype: int64

In [58]:
df_speeds

Unnamed: 0,x,y,z,vx,vy,vz,px,py,pz,distance,speed
0,-117.0,87.0,-33.0,0.47,0.03,0.03,1.90,-0.53,0.57,150.0,5.232
1,-113.0,87.0,-33.0,0.47,0.04,0.03,2.00,-0.34,0.76,146.0,5.232
2,-109.0,87.0,-33.0,0.47,0.04,0.02,2.16,-0.05,0.95,143.0,5.232
3,-105.0,87.0,-33.0,0.47,0.04,0.02,2.33,0.27,1.10,140.0,5.232
4,-101.0,87.0,-33.0,0.48,0.04,0.01,2.48,0.49,1.17,137.0,5.232
...,...,...,...,...,...,...,...,...,...,...,...
2995,69.0,59.0,-33.0,0.92,0.00,0.01,8.11,4.51,5.62,97.0,10.092
2996,73.0,59.0,-33.0,0.92,0.00,-0.00,8.43,4.03,0.78,100.0,10.092
2997,77.0,59.0,-33.0,0.92,0.00,0.00,7.10,3.69,-3.44,103.0,10.092
2998,81.0,59.0,-33.0,0.92,0.00,0.02,4.09,3.58,-6.29,106.0,10.092


In [66]:
df_speeds.drop(["time"], axis=1, inplace=True)

In [67]:
df_speeds.shape

(304116, 11)

In [68]:
df_speeds.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/multiple_speeds.pkl", compression='zip')

In [8]:
t.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/test.pkl", compression='zip')

In [135]:
df = df1.loc[df1["time"]<3]

In [136]:
x_values = df["x"].unique()
y_values = df["y"].unique()
z_values = df["z"].unique()

In [139]:
df = add_transformed_values(df, x_values,"x")
df = add_transformed_values(df, y_values,"y")
df = add_transformed_values(df, z_values,"z")

In [140]:
df.head()

Unnamed: 0,x,y,z,vx,vy,vz,time,transformed_x,transformed_y,transformed_z
0,-125.07891,86.888915,-33.059413,0.049976,0.002444,0.009331,1,-32.0,-22.0,-9.0
1,-121.116733,86.888915,-33.059413,0.049832,0.002287,0.009501,1,-31.0,-22.0,-9.0
2,-117.154557,86.888915,-33.059413,0.04963,0.002129,0.009603,1,-30.0,-22.0,-9.0
3,-113.192381,86.888915,-33.059413,0.049365,0.001991,0.009568,1,-29.0,-22.0,-9.0
4,-109.230205,86.888915,-33.059413,0.049041,0.001901,0.009327,1,-28.0,-22.0,-9.0


In [None]:
final_df.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/{folder}.pkl", compression='zip')

In [34]:
test = pd.read_pickle("/mnt/d/sources/data/DL-PTV/Engineered/test.pkl", compression='zip')


In [41]:
labels= test.drop(test.columns.difference(["x","y","z","time"]), axis=1).to_numpy()
data = test.drop(test.columns.difference(["vx","vy","vz","px","py","pz","distance"]), axis=1)

In [45]:
labels[0:5]

array([[-117.,   87.,  -33.,  363.],
       [-113.,   87.,  -33.,  363.],
       [-109.,   87.,  -33.,  363.],
       [-105.,   87.,  -33.,  363.],
       [-101.,   87.,  -33.,  363.]])

In [43]:
test = test.to_numpy()

In [36]:
test.head()

Unnamed: 0,x,y,z,vx,vy,vz,px,py,pz,time,distance
0,-117.0,87.0,-33.0,0.46,0.02,-0.0,2.47,-2.39,-0.49,363,150.0
1,-113.0,87.0,-33.0,0.46,0.02,0.0,2.54,-2.25,-0.54,363,146.0
2,-109.0,87.0,-33.0,0.47,0.03,0.0,2.44,-2.0,-0.58,363,143.0
3,-105.0,87.0,-33.0,0.47,0.03,0.0,2.21,-1.68,-0.61,363,140.0
4,-101.0,87.0,-33.0,0.47,0.03,0.0,1.89,-1.36,-0.6,363,137.0


In [30]:
labels = np.concatenate((test[:, 0:3]))
data = test[:, 3:]

In [31]:
labels[0:2]

array([[-117.,   87.,  -33.],
       [-113.,   87.,  -33.]])

In [32]:
data[0:3]

array([[ 4.60e-01,  2.00e-02, -0.00e+00,  2.47e+00, -2.39e+00, -4.90e-01,
         3.63e+02,  1.50e+02],
       [ 4.60e-01,  2.00e-02,  0.00e+00,  2.54e+00, -2.25e+00, -5.40e-01,
         3.63e+02,  1.46e+02],
       [ 4.70e-01,  3.00e-02,  0.00e+00,  2.44e+00, -2.00e+00, -5.80e-01,
         3.63e+02,  1.43e+02]])

In [65]:
df = pd.read_pickle("/mnt/d/sources/data/DL-PTV/Engineered/multiple_speeds_500.pkl", compression="zip")
df_loc = pd.read_csv("/mnt/d/sources/data/DL-PTV/Engineered/loc.csv")

In [66]:
result = pd.merge(df, df_loc, how="left", on=["x", "y", "z"])

In [67]:
result.head()

Unnamed: 0,x,y,z,vx,vy,vz,px,py,pz,distance,speed,label
0,-117.0,87.0,-33.0,0.47,0.03,0.03,1.9,-0.53,0.57,150.0,5.232,3
1,-117.0,87.0,-33.0,0.47,0.03,0.03,1.9,-0.53,0.57,150.0,5.232,50691
2,-113.0,87.0,-33.0,0.47,0.04,0.03,2.0,-0.34,0.76,146.0,5.232,4
3,-113.0,87.0,-33.0,0.47,0.04,0.03,2.0,-0.34,0.76,146.0,5.232,50692
4,-109.0,87.0,-33.0,0.47,0.04,0.02,2.16,-0.05,0.95,143.0,5.232,5


In [69]:
len(result.label.unique())

1000

In [72]:
result.groupby("label").count()

Unnamed: 0_level_0,x,y,z,vx,vy,vz,px,py,pz,distance,speed
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
3,6,6,6,6,6,6,6,6,6,6,6
4,6,6,6,6,6,6,6,6,6,6,6
5,6,6,6,6,6,6,6,6,6,6,6
6,6,6,6,6,6,6,6,6,6,6,6
7,6,6,6,6,6,6,6,6,6,6,6
...,...,...,...,...,...,...,...,...,...,...,...
51186,6,6,6,6,6,6,6,6,6,6,6
51187,6,6,6,6,6,6,6,6,6,6,6
51188,6,6,6,6,6,6,6,6,6,6,6
51189,6,6,6,6,6,6,6,6,6,6,6


In [73]:
result.to_pickle(f"/mnt/d/sources/data/DL-PTV/Engineered/multiple_speeds_500.pkl", compression='zip')