In [64]:
import h5py
import tensorflow as tf
import numpy as np
import math
from matplotlib import pyplot as plt
from tensorflow.keras import layers, models, Model, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Lambda

In [240]:
hdf5_file = "energy_data.h5"

In [242]:
# Define the generator function
def hdf5_generator(hdf5_file):
    with h5py.File(hdf5_file, 'r') as f:
        data = f['data']  # Assuming data is (x, y, z) for each sample in the file
        labels = f['labels']
        num_samples = data.shape[0]  # The number of samples in the dataset

        for i in range(num_samples):
            image = data[i]  # Shape of image: (x, y, z)
            label = labels[i]  # Shape of label, depending on your task

            # Add color channel dimension to the image (shape becomes (x, y, z, 1))
            image = np.expand_dims(image, axis=-1)  # Adding the color channel (1 for grayscale)
            
            # Yield the image and label
            yield image, label

# Define the dataset pipeline
dataset = tf.data.Dataset.from_generator(
    hdf5_generator,
    args=[hdf5_file],  # Provide the path to your HDF5 file here
    output_signature=(
        tf.TensorSpec(shape=(20, 110, 11, 1), dtype=tf.float32),  # Shape with added color channel
        tf.TensorSpec(shape=(), dtype=tf.int16)  # Label shape
    )
)

# Optimize the pipeline
dataset = (dataset.prefetch(tf.data.AUTOTUNE))  # Prefetch for performance optimization

print("Data loaded")

Data loaded


In [244]:
with h5py.File(hdf5_file, "r") as hdf:
    dataset_size = len(hdf["data"])  # Or hdf["labels"], if they have the same length
    print(f"Dataset size: {dataset_size}")

Dataset size: 450000


In [246]:
p=0.1
train_dataset=dataset.take(int(dataset_size*p))
train_dataset=train_dataset.batch(32, drop_remainder=True)

In [248]:
early_stopping = EarlyStopping(
    monitor='val_loss',         # Metric to monitor (e.g., 'val_loss' or 'val_accuracy')
    patience=3,                 # Number of epochs with no improvement to wait before stopping
    restore_best_weights=True   # Restore model weights from the best epoch
)

In [250]:
test_hdf5_file = "angle_data.h5"

In [252]:
# Define the generator function
def test_hdf5_generator(hdf5_file):
    with h5py.File(test_hdf5_file, 'r') as f:
        data = f['data']  # Assuming data is (x, y, z) for each sample in the file
        labels = f['labels']
        num_samples = data.shape[0]  # The number of samples in the dataset

        for i in range(num_samples):
            image = data[i]  # Shape of image: (x, y, z)
            label = labels[i]  # Shape of label, depending on your task

            # Add color channel dimension to the image (shape becomes (x, y, z, 1))
            image = np.expand_dims(image, axis=-1)  # Adding the color channel (1 for grayscale)
            
            # Yield the image and label
            yield image, label

# Define the dataset pipeline
test_dataset = tf.data.Dataset.from_generator(
    test_hdf5_generator,
    args=[test_hdf5_file],  # Provide the path to your HDF5 file here
    output_signature=(
        tf.TensorSpec(shape=(20, 110, 11, 1), dtype=tf.float32),  # Shape with added color channel
        tf.TensorSpec(shape=(), dtype=tf.int16)  # Label shape
    )
)

# Optimize the pipeline
test_dataset = (test_dataset
           .prefetch(tf.data.AUTOTUNE))  # Prefetch for performance optimization

print("Data loaded")

Data loaded


In [254]:
val_dataset=test_dataset.take(int(int(0.3*dataset_size*p)))
val_dataset=val_dataset.batch(32, drop_remainder=True)

In [256]:
input_3d = Input(shape=(20, 110, 11, 1), name='3D_Input')

# 3D CNN Path
x = layers.Conv3D(64, kernel_size=(3, 3, 3), activation='relu')(input_3d) 
x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)

x = layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)

# Flatten 3D features
x = layers.Flatten()(x)

summed_tensor = Lambda(lambda t: tf.reduce_sum(t, axis=[1, 2, 3, 4]))(input_3d)  # Sum over spatial dimensions
summed_tensor_expanded = Lambda(lambda t: tf.expand_dims(t, axis=-1))(summed_tensor)  # Expand to (None, 1)

# Ensure shapes are compatible for concatenation
combined = layers.Concatenate(axis=-1)([x, summed_tensor_expanded])  # Concatenate along last axis


# Fully Connected Layers
fc = layers.Dense(128, activation='relu')(combined)
fc = layers.Dense(64, activation='relu')(fc)

# Output Layer
output = layers.Dense(1, activation='linear')(fc)

# Define the Model
model = Model(inputs=[input_3d], outputs=output)

# Compile the Model
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mean_absolute_error'])

# Model Summary
model.summary()

#do przetestowania, w teorii powinien brać też pod uwagę całkowitą energię.

In [258]:
print("Training")
#Trening
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    shuffle=True,
    epochs=20,                 # Set a high number of epochs; EarlyStopping will stop early
    callbacks=[early_stopping],  # Add EarlyStopping to callbacks
    verbose=1
)
print("Training done")

Training
Epoch 1/20
[1m1406/1406[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m400s[0m 284ms/step - loss: 102.2959 - mean_absolute_error: 6.4848 - val_loss: 36.3222 - val_mean_absolute_error: 4.5678
Epoch 2/20
[1m1406/1406[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m391s[0m 278ms/step - loss: 50.9853 - mean_absolute_error: 5.4195 - val_loss: 32.6939 - val_mean_absolute_error: 4.3409
Epoch 3/20
[1m1406/1406[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m388s[0m 276ms/step - loss: 47.8792 - mean_absolute_error: 5.2534 - val_loss: 31.1057 - val_mean_absolute_error: 4.2245
Epoch 4/20
[1m1406/1406[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m377s[0m 268ms/step - loss: 45.1523 - mean_absolute_error: 5.1036 - val_loss: 30.1498 - val_mean_absolute_error: 4.1594
Epoch 5/20
[1m1406/1406[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m639s[0m 455ms/step - loss: 43.1076 - mean_absolute_error: 4.9989 - val_loss: 30.2858 - val_mean_absolute_error: 4.1723
Epoch 6/20
[1m1406/1406[0m 

In [262]:
test_loss, test_mae = model.evaluate(test_ds)
print("Test MAE dla sieci konwolucyjnej:", test_mae)

[1m421/421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 72ms/step - loss: 31.1737 - mean_absolute_error: 4.2007
Test MAE dla sieci konwolucyjnej: 4.1614580154418945


In [194]:
# Get the number of batches using cardinality
batch_count = train_dataset.cardinality()

print(f"Number of batches in the dataset: {batch_count}")


Number of batches in the dataset: -2


In [260]:
test_ds=test_dataset.skip(int(int(0.3*dataset_size*p))).take(int(int(0.3*dataset_size*p)))
test_ds=test_ds.batch(32, drop_remainder=True)

In [310]:
input_3d = Input(shape=(20, 110, 11, 1), name='3D_Input')
summed_tensor = Lambda(lambda t: tf.reduce_sum(t, axis=[1, 2, 3, 4]))(input_3d)  # Sum over spatial dimensions
summed_tensor_expanded = Lambda(lambda t: tf.expand_dims(t, axis=-1))(summed_tensor)  # Expand to (None, 1)
output = summed_tensor_expanded/10
model = Model(inputs=[input_3d], outputs=output)
model.compile(
    optimizer='adam',              # Optimizer (doesn't matter since no parameters to optimize)
    loss='mean_squared_error',     # Loss function (again, won't affect the result)
    metrics=['mae']                # Metric (useful to track performance)
)
model.summary()

In [312]:
test_loss, test_mae = model.evaluate(test_ds)
print("Test MAE dla klasyki:", test_mae)

[1m421/421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 22ms/step - loss: 286.0602 - mae: 14.0955
Test MAE dla klasyki: 14.085655212402344


In [308]:
# Assuming you already have a test dataset, test_ds
# Get one element (sample) from the test dataset
sample_data, sample_label = next(iter(test_ds))

# Check the shape of sample_data to verify the structure
print("Sample Data Shape:", sample_data.shape)

# Make a prediction using the model on the sample data
prediction = model.predict(sample_data)  # Add batch dimension (None, x, y, z, 1)

# Print the prediction result
print("Model Prediction:", prediction)
print("Sample Label:", sample_label)

Sample Data Shape: (32, 20, 110, 11, 1)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Model Prediction: [[ 311.2237 ]
 [ 548.97003]
 [ 904.0277 ]
 [1420.1797 ]
 [ 248.07668]
 [1141.3812 ]
 [ 859.132  ]
 [ 281.27573]
 [ 623.7638 ]
 [ 735.47235]
 [ 718.90204]
 [1484.3306 ]
 [ 787.0299 ]
 [1465.7122 ]
 [ 904.5947 ]
 [ 326.02667]
 [1438.5317 ]
 [1666.551  ]
 [1217.3813 ]
 [1695.4116 ]
 [1448.7953 ]
 [1209.523  ]
 [ 539.1772 ]
 [ 958.6427 ]
 [ 368.6631 ]
 [1786.7576 ]
 [1915.7972 ]
 [ 892.413  ]
 [1751.1792 ]
 [ 240.78052]
 [1869.0503 ]
 [1005.32886]]
Sample Label: tf.Tensor(
[ 25  50  75 125  25 100  75  25  50  75  75 125  75 125  75  25 125 150
 100 150 125 100  50  75  25 150 150  75 150  25 150  75], shape=(32,), dtype=int16)
