In [1]:
%%capture 
!pip install -q "tqdm>=4.36.1"

import os
import datetime
from tqdm.notebook import tqdm
import warnings 
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import (BatchNormalization, Conv2D, Flatten, Dense, Dropout,
                                     MaxPooling2D, AveragePooling2D)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import mean_squared_error, max_error, mean_absolute_error, mean_squared_log_error

# Directories in Kaggle
TRAIN_DIR = '/kaggle/input/autopicar-data/training_data/training_data'
TEST_DIR = '/kaggle/input/autopicar-data/test_data/test_data/'
MODEL_DIR = '/kaggle/working/models'

# Load CSV files for training and testing data
train_df = pd.read_csv('/kaggle/input/autopicar-data/training_norm.csv')
#test_df = pd.read_csv('/kaggle/input/your-dataset/testing_targets.csv')


In [45]:
def evaluating_model(model):
    num_of_samples = len(test_df)

    # Generate predictions
    predictions = model.predict(eval_generator, steps=num_of_samples)

    # If there are no true labels, simply output or save predictions
    print("Predictions generated for the test set:\n")
    print(predictions)

    # Optionally, save the predictions to a CSV file for further analysis
    test_df['Predicted_Angle'] = predictions[:, 0]
    test_df['Predicted_Speed'] = predictions[:, 1]
    
    # Save the predictions to a CSV file
    test_df[['image_id', 'Predicted_Angle', 'Predicted_Speed']].to_csv('predictions.csv', index=False)
    print("Predictions saved to 'predictions.csv'")


In [46]:
import re

# Step 1: List all files in the training_data folder
file_list = os.listdir(TRAIN_DIR)

# Step 2: Extract image IDs from file names (assuming file names are in the format: <image_id>.png)
pattern = re.compile(r'(\d+)\.png')
image_ids = [pattern.search(filename).group(1) for filename in file_list if pattern.match(filename)]

# Step 3: Load the CSV file containing the image_id, speed, and angle data
df = train_df

# Step 4: Convert image_id to string for proper matching
df['image_id'] = df['image_id'].astype(str)

# Step 5: Filter the DataFrame to only include rows where image_id is in the image_ids list
filtered_df = df[df['image_id'].isin(image_ids)]

# Step 6: Add the full file path to the DataFrame
filtered_df['filename'] = filtered_df['image_id'].astype(str) + '.png'
#filtered_df['full_path'] = filtered_df['filename'].apply(lambda x: os.path.join(data_dir, x))

# Step 7: Display the first few rows of the filtered DataFrame
print(filtered_df.tail())

# Now, filtered_df contains only the images present in the directory with their corresponding speed and angle

      image_id   angle  speed   filename
13788    13794  0.6250    1.0  13794.png
13789    13795  0.4375    1.0  13795.png
13790    13796  0.5625    0.0  13796.png
13791    13797  0.6250    0.0  13797.png
13792    13798  0.6875    1.0  13798.png


In [47]:
# Create a test DataFrame with 1020 sequential image_ids
test_image_ids = list(range(1, 1021))  # Sequential image IDs from 1 to 1020

# Create filenames based on image_id (e.g., 1.png, 2.png, ..., 1020.png)
test_filenames = [f"{image_id}.png" for image_id in test_image_ids]

# Create an empty DataFrame with image_id, filenames, and empty 'angle' and 'speed' columns
test_df = pd.DataFrame({
    'image_id': test_image_ids,
    'filename': test_filenames,
    'angle': [None] * 1020,  # Empty angle column
    'speed': [None] * 1020   # Empty speed column
})

print(test_df.head())


   image_id filename angle speed
0         1    1.png  None  None
1         2    2.png  None  None
2         3    3.png  None  None
3         4    4.png  None  None
4         5    5.png  None  None


In [48]:
# Adjust constants
INPUT_SHAPE = [320, 230, 3]  # width, height, channels
BATCH_SIZE = 32

# Use ImageDataGenerator for data augmentation and preprocessing
data_gen = ImageDataGenerator(rotation_range=20,
                              zoom_range=0.15,
                              width_shift_range=0.2,
                              height_shift_range=0.2,
                              shear_range=0.15,
                              brightness_range=(0.1, 0.5),
                              fill_mode='nearest',
                              zca_whitening=True)

# Create training and validation generators from train_data
train_generator = data_gen.flow_from_dataframe(filtered_df, directory=TRAIN_DIR,
                                               x_col='filename',
                                               y_col=['angle', 'speed'],
                                               target_size=tuple(INPUT_SHAPE[:2]),
                                               color_mode='rgb',
                                               class_mode='raw',
                                               batch_size=BATCH_SIZE,
                                               shuffle=True)

# For validation, we can split the filtered_df into training and validation sets
# Assuming val_data is already defined similarly to train_data, here’s the validation generator
valid_generator = data_gen.flow_from_dataframe(filtered_df, directory=TRAIN_DIR,
                                               x_col='filename',
                                               y_col=['angle', 'speed'],
                                               target_size=tuple(INPUT_SHAPE[:2]),
                                               color_mode='rgb',
                                               class_mode='raw',
                                               batch_size=BATCH_SIZE,
                                               shuffle=True)

# For evaluation, since test_data has empty 'angle' and 'speed', we focus on filenames
# The model will predict 'angle' and 'speed' values during evaluation
eval_generator = ImageDataGenerator().flow_from_dataframe(test_df, directory=TEST_DIR,
                                                          x_col='filename',
                                                          y_col=None,  # No labels for testing
                                                          target_size=tuple(INPUT_SHAPE[:2]),
                                                          class_mode=None,  # Only filenames, no labels
                                                          batch_size=1,
                                                          shuffle=False)

# Calculate step sizes for training and validation
STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size


Found 13793 validated image filenames.
Found 13793 validated image filenames.
Found 1020 validated image filenames.


In [49]:
import tensorflow as tf

def callback_config(LOG_DIR, MODEL_DIR):
    """
    This function creates and returns a set of callbacks for model training:
    - ModelCheckpoint: Saves the model weights when the validation MSE improves.
    - TensorBoard: Logs metrics for visualization in TensorBoard.
    - EarlyStopping: Stops training early if the validation MSE does not improve for a number of epochs.

    Parameters:
    LOG_DIR (str): Path to the directory where TensorBoard logs will be stored.
    MODEL_DIR (str): Path to the directory where the model checkpoints will be saved.

    Returns:
    List of TensorFlow callbacks.
    """
    # Checkpoint to save the best model during training
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=f"{MODEL_DIR}/best_model.keras",  # Directory to save the model
        verbose=1,           # Verbose output to display saving actions
        save_best_only=True  # Only save the model with the best validation MSE
    )
    
    # TensorBoard callback for logging the training process
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=LOG_DIR,       # Directory for storing TensorBoard logs
        histogram_freq=1       # Frequency (in epochs) at which to compute activation and weight histograms
    )
    
    # Early stopping to prevent overfitting by stopping if val_mse doesn't improve
    es_callback = tf.keras.callbacks.EarlyStopping(
        monitor='val_mse',     # Monitor validation mean squared error
        min_delta=0,           # Minimum change to qualify as improvement
        patience=10,           # Number of epochs with no improvement to stop training
        verbose=1,             # Verbose output to notify when early stopping occurs
        mode='auto',           # Automatically determine direction (minimization or maximization)
        baseline=None,         # No specific baseline
        restore_best_weights=True  # Restore weights of the best epoch after stopping
    )
    
    return checkpoint_callback, tensorboard_callback, es_callback


In [50]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adadelta, Ftrl
from tensorflow.keras.optimizers.schedules import ExponentialDecay, InverseTimeDecay
from tensorflow.keras.regularizers import l1_l2, l1, l2
from tensorflow.keras.activations import selu, relu, elu, hard_sigmoid, swish, sigmoid
from tensorflow.keras.layers import BatchNormalization, Conv2D, Flatten, Dense, Dropout, Input, Embedding, Reshape, AveragePooling2D, MaxPooling2D, SpatialDropout2D

from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras.callbacks import TensorBoard

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, classification_report

import time
%matplotlib inline


def Model(Hparams):
    model = tf.keras.Sequential()
    model.add(BatchNormalization(input_shape = INPUT_SHAPE))
    model.add(Conv2D(Hparams[HP_NUM_FILTERS_CL1],
                     kernel_size =(3,3),
                     strides = (1,1),
                     input_shape = INPUT_SHAPE,
                     activation = swish,
                     kernel_regularizer=l1_l2(l1=1e-8, l2=1e-8),
                     bias_regularizer=l2(l2=1e-8)))
    model.add(MaxPooling2D(pool_size=4))
    model.add(Conv2D(filters= 30,
                     kernel_size= (3,3),
                     strides = (1,1),
                     activation = swish))
    model.add(AveragePooling2D(pool_size=4))
    model.add(Flatten())
    model.add(Dense(Hparams[HP_NUM_UNITS_FC1],
                    activation = swish,
                    kernel_regularizer=l1_l2(l1=1e-8, l2=1e-8),
                    bias_regularizer=l2(l2=1e-8)))
    model.add(Dropout(Hparams[HP_DROPOUT_FC1]))
    model.add(Dense(units = 100,
                    activation = swish,
                    kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4),
                    bias_regularizer=l2(l2=1e-7)))
    model.add(Dense(2, activation='sigmoid')) # angle class and speed class
    # Fitting details. 
    scheduler = ExponentialDecay(initial_learning_rate = 0.001,
                                 decay_steps=100000,
                                 decay_rate=0.96)
    model.compile(loss='mse',
                  optimizer=Hparams[HP_OPTIMIZER],
                  metrics=['mse'])
    return model

In [51]:
LOG_DIR = "/kaggle/working/logs/fit"  # Adjust for Kaggle path
MODEL_DIR = "/kaggle/working/models"  # Adjust for Kaggle path


def experimental_run(LOG_DIR_HP, hparams):
    model = Model(hparams)
    print(model.summary())
    
    # Get the callbacks
    checkpoint_callback, tensorboard_callback, es_callback = callback_config(LOG_DIR, MODEL_DIR)

    # Train the model
    model.fit(train_generator,
              steps_per_epoch=STEP_SIZE_TRAIN,
              epochs=3,
              verbose=1,
              callbacks=[checkpoint_callback, es_callback, tensorboard_callback],
              validation_data=valid_generator,
              validation_steps=STEP_SIZE_VALID,
              validation_freq=1,
              shuffle=True)
    
    # Evaluate the model
    evaluating_model(model)


In [54]:
from tensorboard.plugins.hparams import api as hp

# Hyperparameters definition
HP_NUM_UNITS_FC1 = hp.HParam('num_units', hp.Discrete([250, 200, 160]))
HP_DROPOUT_FC1 = hp.HParam('dropout', hp.RealInterval(0.5, 0.8))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['Adam']))
HP_NUM_FILTERS_CL1 = hp.HParam('filters', hp.Discrete([5]))

# Metrics to track during hyperparameter tuning
METRICS = [
    hp.Metric("epoch_accuracy", group="validation", display_name="Validation Accuracy"),
    hp.Metric("epoch_loss", group="validation", display_name="Validation Loss"),
    hp.Metric("batch_accuracy", group="train", display_name="Training Accuracy"),
    hp.Metric("batch_loss", group="train", display_name="Training Loss"),
]

# Set up TensorBoard logging directory for hyperparameter tuning
log_dir = 'logs/hparam_tuning'

# Clear logs from previous runs if necessary (optional step)
# !rm -rf ./logs/

# Write the hparams configuration to the logs for TensorBoard
with tf.summary.create_file_writer(log_dir).as_default():
    hp.hparams_config(
        hparams=[HP_NUM_UNITS_FC1, HP_NUM_FILTERS_CL1, HP_DROPOUT_FC1, HP_OPTIMIZER],
        metrics=METRICS,
    )

print("Hyperparameter tuning setup complete. Logs will be saved to:", log_dir)


Hyperparameter tuning setup complete. Logs will be saved to: logs/hparam_tuning


In [55]:
# Initialize session counter
session_num = 0

# Loop through all possible combinations of hyperparameters
for num_units in HP_NUM_UNITS_FC1.domain.values:
    for dropout_rate in (HP_DROPOUT_FC1.domain.min_value, HP_DROPOUT_FC1.domain.max_value):
        for optimizer in HP_OPTIMIZER.domain.values:
            for num_filters in HP_NUM_FILTERS_CL1.domain.values:
                
                # Create a dictionary to store the current hyperparameters for this run
                hparams = {
                    HP_NUM_UNITS_FC1: num_units,
                    HP_DROPOUT_FC1: dropout_rate,
                    HP_OPTIMIZER: optimizer,
                    HP_NUM_FILTERS_CL1: num_filters
                }
                
                # Name the run based on session number
                run_name = f"run-{session_num}"
                
                # Print out the details of the current trial
                print(f'=============== Starting trial: {run_name} ===============\n')
                print('Settings for experiment are:\n')
                print({h.name: hparams[h] for h in hparams})
                print('\n')
                
                # Call experimental_run to train the model with these hyperparameters
                experimental_run(f'logs/hparam_tuning/{run_name}', hparams)
                
                # Increment session number for the next trial
                session_num += 1



Settings for experiment are:

{'num_units': 160, 'dropout': 0.5, 'optimizer': 'Adam', 'filters': 5}




None
Epoch 1/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 707ms/step - loss: 0.1232 - mse: 0.1019
Epoch 1: val_loss improved from inf to 0.07934, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m620s[0m 1s/step - loss: 0.1232 - mse: 0.1019 - val_loss: 0.0793 - val_mse: 0.0652
Epoch 2/3
[1m  1/431[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 29ms/step - loss: 0.0751 - mse: 0.0610
Epoch 2: val_loss did not improve from 0.07934
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0751 - mse: 0.0610 - val_loss: 0.1603 - val_mse: 0.1462
Epoch 3/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 707ms/step - loss: 0.0843 - mse: 0.0717
Epoch 3: val_loss improved from 0.07934 to 0.06440, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m618s[0m 1s/step - loss: 0.0843 - mse: 0.0717 - val_los

None
Epoch 1/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 700ms/step - loss: 0.1286 - mse: 0.1069
Epoch 1: val_loss improved from inf to 0.10222, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m619s[0m 1s/step - loss: 0.1285 - mse: 0.1068 - val_loss: 0.1022 - val_mse: 0.0875
Epoch 2/3
[1m  1/431[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 29ms/step - loss: 0.1052 - mse: 0.0905
Epoch 2: val_loss improved from 0.10222 to 0.07575, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.1052 - mse: 0.0905 - val_loss: 0.0757 - val_mse: 0.0610
Epoch 3/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 700ms/step - loss: 0.0987 - mse: 0.0854
Epoch 3: val_loss did not improve from 0.07575
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m614s[0m 1s/step - loss: 0.0986 - mse: 0.0854 - val_los

None
Epoch 1/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 704ms/step - loss: 0.1239 - mse: 0.1008
Epoch 1: val_loss improved from inf to 0.09559, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m623s[0m 1s/step - loss: 0.1238 - mse: 0.1008 - val_loss: 0.0956 - val_mse: 0.0822
Epoch 2/3
[1m  1/431[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 29ms/step - loss: 0.0887 - mse: 0.0753
Epoch 2: val_loss did not improve from 0.09559
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0887 - mse: 0.0753 - val_loss: 0.1862 - val_mse: 0.1728
Epoch 3/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 704ms/step - loss: 0.0820 - mse: 0.0701
Epoch 3: val_loss improved from 0.09559 to 0.06134, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m614s[0m 1s/step - loss: 0.0820 - mse: 0.0701 - val_los

None
Epoch 1/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 705ms/step - loss: 0.1283 - mse: 0.1044
Epoch 1: val_loss improved from inf to 0.09746, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m620s[0m 1s/step - loss: 0.1283 - mse: 0.1044 - val_loss: 0.0975 - val_mse: 0.0827
Epoch 2/3
[1m  1/431[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 29ms/step - loss: 0.1452 - mse: 0.1305
Epoch 2: val_loss improved from 0.09746 to 0.05890, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1452 - mse: 0.1305 - val_loss: 0.0589 - val_mse: 0.0441
Epoch 3/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 702ms/step - loss: 0.0967 - mse: 0.0837
Epoch 3: val_loss did not improve from 0.05890
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m615s[0m 1s/step - loss: 0.0967 - mse: 0.0837 - val_los

None
Epoch 1/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 704ms/step - loss: 0.1241 - mse: 0.0978
Epoch 1: val_loss improved from inf to 0.08116, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m622s[0m 1s/step - loss: 0.1241 - mse: 0.0978 - val_loss: 0.0812 - val_mse: 0.0650
Epoch 2/3
[1m  1/431[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 30ms/step - loss: 0.0858 - mse: 0.0696
Epoch 2: val_loss improved from 0.08116 to 0.04710, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0858 - mse: 0.0696 - val_loss: 0.0471 - val_mse: 0.0309
Epoch 3/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 704ms/step - loss: 0.0844 - mse: 0.0699
Epoch 3: val_loss did not improve from 0.04710
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m613s[0m 1s/step - loss: 0.0844 - mse: 0.0699 - val_los

None
Epoch 1/3
[1m430/431[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 710ms/step - loss: 0.1347 - mse: 0.1075
Epoch 1: val_loss improved from inf to 0.10265, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m621s[0m 1s/step - loss: 0.1346 - mse: 0.1074 - val_loss: 0.1026 - val_mse: 0.0850
Epoch 2/3
[1m  1/431[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 29ms/step - loss: 0.1309 - mse: 0.1132
Epoch 2: val_loss improved from 0.10265 to 0.01906, saving model to /kaggle/working/models/best_model.keras
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1309 - mse: 0.1132 - val_loss: 0.0191 - val_mse: 0.0014
Epoch 3/3
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 701ms/step - loss: 0.1002 - mse: 0.0846
Epoch 3: val_loss did not improve from 0.01906
[1m431/431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m613s[0m 1s/step - loss: 0.1002 - mse: 0.0846 - val_los

In [10]:
df.to_csv('/kaggle/working/pred.csv', index=False)