In [1]:
import os
import sys
import h5py
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

from sklearn.base import clone
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import optuna

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import RMSprop, SGD
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.callbacks import EarlyStopping

import warnings
warnings.filterwarnings('ignore')


print(tf.__version__)

2.10.0


In [2]:
def get_group_data(group):
    data_list = []
    dt_list = []
    filename_list = []

    for key in group.keys():
        
        for key2 in group[key].keys():

            dt = datetime.strptime(' '.join([key.split('__')[-2], key.split('__')[-1].replace('-', ':')]), 
                                        "%Y-%m-%d %H:%M:%S:%f")
            data = np.array(group[key][key2])
            data_list.append(data)
            dt_list.append(dt)
            filename_list.append(key2)
    
    df = pd.DataFrame({'filename': filename_list, 'data': data_list, 'dt': dt_list})
    return df

def plot_loss(history, num_epochs):
    plt.plot(history.history['loss'][:num_epochs], label='loss')
    plt.plot(history.history['val_loss'][:num_epochs], label='val_loss')
    # plt.ylim([0, 10])
    plt.xlabel('Epoch')
    plt.ylabel('Error [mm]')
    plt.legend()
    plt.grid(True)



In [3]:
# Get a list of all H5 files
# h5_files = glob.glob('scanner3DTop_Transformations/TESTDATASET_*/*.h5')
h5_files = glob.glob('scanner3DTop_Transformations/**/**/**.h5', recursive= True)

df_list = []

for h5_file in h5_files:

    # Open H5 file
    h5 = h5py.File(h5_file, 'r')
    keys = list(h5.keys())
    
    if "EW" in keys and "NS" in keys and "environment_logger" in keys:

        # Get negative direction transformations
        negative = get_group_data(group=h5["EW/individual/transformations/negative/"])

        # Get positive direction transformations
        positive = get_group_data(group=h5["EW/individual/transformations/positive/"])

        # Combine positive and negative transformations
        transformations = pd.concat([positive, negative])

        # Extract Environment Logger data
        df = pd.read_hdf(h5_file, 'environment_logger') 

        # Merge transformations and Environment logger data
        df = df.merge(transformations, on='filename')

        # Drop unwanted columns
        df = df.drop(['directories', 'filename', 'dt'], axis=1)

        # Flatten the transformations
        df['data'] = df['data'].apply(lambda x: x.flatten())

        df['field'] = df['field'].map({'north': 0.0, 'south': 1.0})
        df['scan_direction'] = df['scan_direction'].map({'Negative': 0.0, 'Positive': 1.0})
        df = df.drop(['brightness'], axis=1) #, 'time'

        # Reset the index of your DataFrame
        df = df.reset_index(drop=True)

        # Add to list
        df_list.append(df)

df = pd.concat(df_list)
df = df.dropna()
df = df.reset_index(drop=True)

In [4]:
# Extract X, Y, Z transformations
df['data'] = df['data'].apply(lambda x: x.reshape(4, 4)[:3,3])#[:3])
# df['data'] = df['data'].astype('float')

In [5]:
df

Unnamed: 0,time,sunDirection,airPressure,relHumidity,temperature,windDirection,precipitation,windVelocity,par,field,x_position,y_position,z_position,scan_direction,data
0,2022-05-05 23:09:24,338.213446,1010.343944,19.568468,25.102084,173.380535,0.089503,3.887448,0.000000,0.0,303.8460,0.000,1.234,1.0,"[-15.0, -15.0, 0.0]"
1,2022-05-05 23:11:12,338.707846,1010.407422,19.681387,25.071566,171.128269,0.089503,3.416852,0.000000,0.0,304.5450,22.135,1.234,0.0,"[-10.0, 15.0, 0.0]"
2,2022-05-05 23:13:01,339.213233,1010.383618,19.989624,24.931181,171.402936,0.086451,3.394879,1.465023,0.0,305.2460,0.000,1.234,1.0,"[-20.0, -15.0, 0.0]"
3,2022-05-05 23:14:50,339.707633,1010.296335,19.870602,24.949492,162.470779,0.089503,3.050630,0.000000,0.0,305.9455,22.135,1.234,0.0,"[-15.0, 10.0, 0.0]"
4,2022-05-05 23:16:38,340.311899,1010.256661,19.965209,24.827418,158.658406,0.086451,3.517563,0.000000,0.0,306.6455,0.000,1.234,1.0,"[-10.0, -10.0, 0.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4224,2023-07-21 16:27:18,272.403333,1004.527726,16.840114,44.298227,268.008667,0.110866,6.912442,1120.253937,0.0,304.5460,22.135,2.319,0.0,"[115.0, 85.0, 0.0]"
4225,2023-07-21 16:29:13,272.721946,1004.511856,17.099521,44.578997,273.732719,0.104762,4.070559,1103.162007,0.0,305.2460,3.800,2.319,1.0,"[120.0, 30.0, 0.0]"
4226,2023-07-21 16:31:07,272.919706,1004.503922,17.221595,44.548479,265.437788,0.104762,6.286203,1090.953486,0.0,305.9460,22.135,2.319,0.0,"[25.0, -15.0, 0.0]"
4227,2023-07-21 16:33:00,273.216346,1004.456313,17.334513,44.426405,269.436934,0.104762,4.409314,1073.373215,0.0,306.6455,3.800,2.319,1.0,"[390.0, 250.0, 0.0]"


In [128]:
test_ply_metadata = df.iloc[[4228]]
test_ply_metadata=test_ply_metadata.drop(["time", "par", "field"], axis=1)
test_ply_metadata

Unnamed: 0,sunDirection,airPressure,relHumidity,temperature,windDirection,precipitation,windVelocity,x_position,y_position,z_position,scan_direction,data
4228,273.414106,1004.392834,17.447432,44.246345,288.586688,0.104762,4.477065,307.346,22.135,2.319,0.0,"[85.0, 50.0, 0.0]"


In [9]:
df2= df.drop(['time', 'par','field'], axis=1)
df2

Unnamed: 0,sunDirection,airPressure,relHumidity,temperature,windDirection,precipitation,windVelocity,x_position,y_position,z_position,scan_direction,data
0,338.213446,1010.343944,19.568468,25.102084,173.380535,0.089503,3.887448,303.8460,0.000,1.234,1.0,"[-15.0, -15.0, 0.0]"
1,338.707846,1010.407422,19.681387,25.071566,171.128269,0.089503,3.416852,304.5450,22.135,1.234,0.0,"[-10.0, 15.0, 0.0]"
2,339.213233,1010.383618,19.989624,24.931181,171.402936,0.086451,3.394879,305.2460,0.000,1.234,1.0,"[-20.0, -15.0, 0.0]"
3,339.707633,1010.296335,19.870602,24.949492,162.470779,0.089503,3.050630,305.9455,22.135,1.234,0.0,"[-15.0, 10.0, 0.0]"
4,340.311899,1010.256661,19.965209,24.827418,158.658406,0.086451,3.517563,306.6455,0.000,1.234,1.0,"[-10.0, -10.0, 0.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...
4224,272.403333,1004.527726,16.840114,44.298227,268.008667,0.110866,6.912442,304.5460,22.135,2.319,0.0,"[115.0, 85.0, 0.0]"
4225,272.721946,1004.511856,17.099521,44.578997,273.732719,0.104762,4.070559,305.2460,3.800,2.319,1.0,"[120.0, 30.0, 0.0]"
4226,272.919706,1004.503922,17.221595,44.548479,265.437788,0.104762,6.286203,305.9460,22.135,2.319,0.0,"[25.0, -15.0, 0.0]"
4227,273.216346,1004.456313,17.334513,44.426405,269.436934,0.104762,4.409314,306.6455,3.800,2.319,1.0,"[390.0, 250.0, 0.0]"


In [10]:
dataset = df2
dataset = dataset.reset_index(drop=True)

# Convert any Timestamps in the input features to a numerical representation
# dataset = dataset.apply(lambda x: x.astype(int) if np.issubdtype(x.dtype, np.datetime64) else x)
#dataset['time'] = dataset['time'].values.astype(int)

# Convert only the columns with numeric data types to float
#dataset[dataset.select_dtypes(include='number').columns] = dataset.select_dtypes(include='number').astype(float)

In [11]:
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index, axis=0)

In [12]:
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('data')
test_labels = test_features.pop('data')

In [13]:
train_labels = train_labels.tolist()
train_labels = np.array(train_labels)

test_labels = test_labels.tolist()
test_labels = np.array(test_labels)

In [14]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras import optimizers
import tensorflow.keras.backend as k

In [15]:
train_features.shape

(3383, 11)

In [16]:
scaler = StandardScaler()
scaled_train_features = scaler.fit_transform(train_features)

In [17]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [18]:
l1=4.5927655210613596e-08 
l2=6.769705259483123e-10
model_5 = keras.Sequential(
    [
        layers.Input(shape=(train_features.shape[1]), name="in_layer"),
        layers.Dense(112, activation='tanh', kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2), name="dl1"),
        layers.Dense(112, activation='tanh', kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2), name="dl2"),
        layers.Dense(112, activation='tanh', kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2), name="dl3"),
        layers.Reshape((1,112), name="reshape_layer"),
        layers.SimpleRNN(112, activation='tanh', name="rnn2_layer"),
        layers.Dense(3, name="out_layer")
    ]
)

lr = 0.0017172015688603477
b1 = 0.8539642649814688
b2 = 0.9358548594051231
cv =  0.964758137195115
num_batches=32
# opt='sgd'
scaler='standard_scaler'
loss_func = 'huber_loss'
num_epochs=998

model_5.summary()
opt = Adam(learning_rate=lr, beta_1=b1, beta_2=b2, clipvalue=cv)
#opt = SGD(learning_rate=lr) #, clipvalue=cv)
model_5.compile(loss=loss_func, optimizer=opt, metrics=["accuracy"])

#early_stopping = EarlyStopping(monitor='val_loss', mode='min', patience=30)
history_5 = model_5.fit(scaled_train_features, train_labels, validation_split=0.2, verbose=1, epochs=num_epochs, batch_size=num_batches) #, callbacks=[early_stopping])

# Evaluate the final performance of the model on the validation data
val_loss = history_5.history['val_loss'][-1]
accuracy = history_5.history['accuracy'][-1]
print("Val loss: ", val_loss)
print("Accuracy: ", accuracy)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dl1 (Dense)                 (None, 112)               1344      
                                                                 
 dl2 (Dense)                 (None, 112)               12656     
                                                                 
 dl3 (Dense)                 (None, 112)               12656     
                                                                 
 reshape_layer (Reshape)     (None, 1, 112)            0         
                                                                 
 rnn2_layer (SimpleRNN)      (None, 112)               25200     
                                                                 
 out_layer (Dense)           (None, 3)                 339       
                                                                 
Total params: 52,195
Trainable params: 52,195
Non-traina

In [19]:
import numpy as np
import open3d as o3d

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [144]:
# load in specific point cloud that was dropped from dataset
file_name_w = "C:/Users/emily/OneDrive/Desktop/3d_transformation_prediction/2023-07-21__12-59-17-030_sorghum_west_downsampled/west_downsampled/2023-07-21__16-34-56-036/5988af8d-12e4-49b5-b929-14af2e97f158__Top-heading-west.ply"
west_ply_test = o3d.io.read_point_cloud(file_name_w)
west_ply_target = o3d.io.read_point_cloud(file_name_w)

In [129]:
# Get proper tensor for network prediction
target_transformation = test_ply_metadata['data'] # save expected value
target_transformation

test_ply_metadata.pop('data')
test_ply_metadata.shape

(1, 11)

In [130]:
net_results = model_5.predict(test_ply_metadata)



In [131]:
#Fix weird pandas series transformation
target_transformation= target_transformation.tolist()
target_transformation[0]
tt=[]
for i in target_transformation[0]:
    tt.append(i)

tt.append(1)
target_transformation=tt


In [132]:
#Fix weird pandas series transformation
nr=[]
for i in net_results[0]:
    nr.append(i)

nr.append(1)
net_results=nr

In [133]:
net_results = np.array(net_results)
target_transformation = np.array(target_transformation)

In [135]:
# Create matrix
net_results= np.stack((net_results, [0,1,0,0], [0,0,1,0], [1,0,0,0]))
target_transformation= np.stack((target_transformation, [0,1,0,0], [0,0,1,0], [1,0,0,0]))

net_results = net_results.T
target_transformation =target_transformation.T

net_results[:, [3,0]] = net_results[:, [0,3]]
target_transformation[:, [3,0]] = target_transformation[:, [0,3]]

In [145]:
#Apply transformations
west_ply_test.transform(net_results)
west_ply_target.transform(target_transformation)

PointCloud with 393763 points.

In [146]:
o3d.io.write_point_cloud("target_results.ply", west_ply_target, write_ascii=False)
o3d.io.write_point_cloud("net_results.ply", west_ply_test, write_ascii=False)

True

In [107]:
# pcd = o3d.geometry.PointCloud()

#     # the method Vector3dVector() will convert numpy array of shape (n, 3) to Open3D format.
#     # see http://www.open3d.org/docs/release/python_api/open3d.utility.Vector3dVector.html#open3d.utility.Vector3dVector
# pcd.points = o3d.utility.Vector3dVector(net_results)

#     # http://www.open3d.org/docs/release/python_api/open3d.io.write_point_cloud.html#open3d.io.write_point_cloud
# o3d.io.write_point_cloud("net_results.ply", pcd, write_ascii=True)

True

In [None]:
plt.plot(history_1.history['loss'][:num_epochs], label='loss-4 layers')
plt.plot(history_2.history['loss'][:num_epochs], label='loss-5 layers')
plt.plot(history_3.history['loss'][:num_epochs], label='loss-6 layers')


# plt.ylim([0, 10])
plt.xlabel('Epoch')
plt.ylabel('Error [mm]')
plt.legend()
plt.grid(True)
plt.savefig('comp_layers_plot.png', bbox_inches='tight')

In [None]:
plt.savefig('comp_layers_plot.png', bbox_inches='tight')

In [None]:
test_labels[0][0]
test_predictions = model_5.predict(test_features)
test_predictions[0][1]
test_labels[0]
test_predictions.shape

In [None]:
test_predictions = model_5.predict(test_features)#.flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
lims = [0, 50]
plt.xlim(lims)
plt.ylim(lims)
fig = plt.plot(lims, lims)

plt.savefig('test_predictions_original.png', bbox_inches='tight')