In [1]:
print('Hello')

Hello


In [2]:
import os
import tensorflow as tf
import numpy as np
from datetime import datetime
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Layer, MultiHeadAttention, LayerNormalization, GlobalAveragePooling2D
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.regularizers import l2
from utils.data_loader import train_generator, val_generator


# Custom TensorBoard callback for batch-level logging
class TensorBoardBatch(tf.keras.callbacks.TensorBoard):
    def __init__(self, *args, **kwargs):
        super(TensorBoardBatch, self).__init__(*args, **kwargs)
        self.writer = None
        self.step = 0

    def on_batch_end(self, batch, logs=None):
        logs = logs or {}
        self.step += 1
        
        if self.writer is None:
            self.writer = tf.summary.create_file_writer(self.log_dir)

        with self.writer.as_default():
            for name, value in logs.items():
                if name in ['batch', 'size']:
                    continue
                tf.summary.scalar(f'batch_{name}', value, step=self.step)


# Enhanced Transformer block definition
class EnhancedTransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout_rate=0.1):
        super(EnhancedTransformerBlock, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.dropout_rate = dropout_rate

        # Initial projection to match embed_dim
        self.projection = Dense(embed_dim)
        
        # Multi-head attention
        self.att = MultiHeadAttention(
            num_heads=num_heads, 
            key_dim=embed_dim // num_heads,
            dropout=dropout_rate
        )
        
        # Feedforward network
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation='gelu'),
            Dropout(dropout_rate),
            Dense(embed_dim)
        ])
        
        # Layer normalizations
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        
        # Dropouts
        self.dropout1 = Dropout(dropout_rate)
        self.dropout2 = Dropout(dropout_rate)

    def call(self, inputs, training=True):
        # Project input to embedding dimension
        x = self.projection(inputs)
        
        # Reshape for attention if needed
        if len(x.shape) == 2:
            x = tf.expand_dims(x, axis=1)
        
        # Self-attention
        attention_output = self.att(x, x)
        attention_output = self.dropout1(attention_output, training=training)
        out1 = self.layernorm1(x + attention_output)
        
        # Feedforward
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)
        
        # Remove sequence dimension if present
        if len(out2.shape) == 3:
            out2 = tf.squeeze(out2, axis=1)
            
        return out2


# Monitor for NaN values in the loss
class NaNMonitor(tf.keras.callbacks.Callback):
    def on_batch_end(self, batch, logs=None):
        logs = logs or {}
        if 'loss' in logs and (np.isnan(logs['loss']) or np.isinf(logs['loss'])):
            print('NaN/Inf Loss detected, terminating training...')
            self.model.stop_training = True


# Custom loss function to calculate Earth Mover's Distance
def improved_earth_mover_loss(y_true, y_pred):
    epsilon = K.epsilon()
    
    # Ensure proper normalization
    y_true = K.clip(y_true, epsilon, 1.0)
    y_pred = K.clip(y_pred, epsilon, 1.0)
    
    # Calculate cumulative distributions
    cdf_true = K.cumsum(y_true, axis=-1)
    cdf_pred = K.cumsum(y_pred, axis=-1)
    
    # Calculate EMD
    emd = K.mean(K.abs(cdf_true - cdf_pred), axis=-1)
    return K.mean(emd)


# Custom metric for Spearman Correlation
def spearman_correlation_tf(y_true, y_pred):
    y_true_rank = tf.argsort(tf.argsort(y_true, axis=-1), axis=-1)
    y_pred_rank = tf.argsort(tf.argsort(y_pred, axis=-1), axis=-1)

    y_true_rank = tf.cast(y_true_rank, dtype=tf.float32)
    y_pred_rank = tf.cast(y_pred_rank, dtype=tf.float32)

    mean_y_true = tf.reduce_mean(y_true_rank, axis=-1, keepdims=True)
    mean_y_pred = tf.reduce_mean(y_pred_rank, axis=-1, keepdims=True)

    cov = tf.reduce_mean((y_true_rank - mean_y_true) * (y_pred_rank - mean_y_pred), axis=-1)
    std_y_true = tf.sqrt(tf.reduce_mean(tf.square(y_true_rank - mean_y_true), axis=-1))
    std_y_pred = tf.sqrt(tf.reduce_mean(tf.square(y_pred_rank - mean_y_pred), axis=-1))

    spearman_corr = cov / (std_y_true * std_y_pred + K.epsilon())
    return tf.reduce_mean(spearman_corr)


def pearson_correlation_tf(y_true, y_pred):
    # Ensure proper shapes
    y_true = tf.convert_to_tensor(y_true)
    y_pred = tf.convert_to_tensor(y_pred)

    # Calculate the mean of y_true and y_pred along the class dimension (axis=-1)
    mean_y_true = tf.reduce_mean(y_true, axis=-1, keepdims=True)
    mean_y_pred = tf.reduce_mean(y_pred, axis=-1, keepdims=True)

    # Compute covariance
    covariance = tf.reduce_mean((y_true - mean_y_true) * (y_pred - mean_y_pred), axis=-1)

    # Compute the standard deviations of y_true and y_pred
    std_y_true = tf.sqrt(tf.reduce_mean(tf.square(y_true - mean_y_true), axis=-1))
    std_y_pred = tf.sqrt(tf.reduce_mean(tf.square(y_pred - mean_y_pred), axis=-1))

    # Compute Pearson correlation
    pearson_corr = covariance / (std_y_true * std_y_pred + tf.keras.backend.epsilon())

    return tf.reduce_mean(pearson_corr)


# Model creation function with MobileNet base and transformer block
def create_model(image_size=224, num_classes=10):  # Updated to match your output dimension
    # Base model
    base_model = MobileNet(
        input_shape=(image_size, image_size, 3),
        alpha=1.0,
        include_top=False,
        weights='imagenet'
    )
    
    # Freeze early layers
    for layer in base_model.layers:
        if 'block_14' in layer.name or 'block_13' in layer.name:
            layer.trainable = True
        else:
            layer.trainable = False

    # Build model
    x = base_model.output
    x = GlobalAveragePooling2D()(x)  # This will give us (batch_size, 1024)
    
    # First transformer block
    x = EnhancedTransformerBlock(
        embed_dim=256,  # Reduced dimension
        num_heads=4,    # Reduced heads
        ff_dim=512,
        dropout_rate=0.1
    )(x)
    
    # Additional dense layers
    x = Dense(128, activation='gelu', kernel_regularizer=l2(0.01))(x)
    x = Dropout(0.3)(x)
    
    # Final prediction layer
    predictions = Dense(num_classes, activation='softmax', kernel_regularizer=l2(0.01))(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    return model


# Main function to train the model
def main():
    # Hyperparameters
    image_size = 224
    batch_size = 32
    epochs = 300
    num_classes = 10  # Make sure this matches your data
    initial_lr = 1e-4

    # Create and compile model
    model = create_model(image_size, num_classes)
    optimizer = Adam(
        learning_rate=initial_lr,
        clipnorm=1.0,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-8
    )
    
    model.compile(
        optimizer=optimizer,
        loss=improved_earth_mover_loss,
        metrics=['categorical_accuracy', spearman_correlation_tf, pearson_correlation_tf]
    )
    
    # Print model summary to verify shapes
    model.summary()
    
    # Callbacks
    callbacks = [
        ModelCheckpoint(
            'weights/mobilenet_weights.h5',
            monitor='val_spearman_correlation_tf',
            verbose=1,
            save_weights_only=True,
            save_best_only=True,
            mode='max'
        ),
        TensorBoardBatch(log_dir=os.path.join('./logs', datetime.now().strftime("%Y%m%d-%H%M%S"))),
        NaNMonitor(),
        #EarlyStopping(
           # monitor='val_spearman_correlation_tf',
          #  patience=50,
           # verbose=1,
           # restore_best_weights=True
        #),
        
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=150,
            min_lr=1e-7,
            verbose=1
        )
    ]

    # Training
    try:
        history = model.fit(
            train_generator(batchsize=batch_size),
            steps_per_epoch=(917. // batch_size),
            epochs=epochs,
            verbose=1,
            validation_data=val_generator(batchsize=batch_size),
            validation_steps=(101. // batch_size),
            callbacks=callbacks
        )
        
        model.save_weights('final_model_weights.h5')
        print("Training completed successfully!")
        
    except Exception as e:
        print(f"Error during training: {str(e)}")
        raise


if __name__ == "__main__":
    main()


Loading training set and val set
Train set size: (917,) (917, 10)
Validation set size: (101,) (101, 10)
Train and validation datasets ready!
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv1 (Conv2D)              (None, 112, 112, 32)      864       
                                                                 
 conv1_bn (BatchNormalizatio  (None, 112, 112, 32)     128       
 n)                                                              
                                                                 
 conv1_relu (ReLU)           (None, 112, 112, 32)      0         
                                                                 
 conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)     288       
                                                    

KeyboardInterrupt: 