In [None]:
from tensorflow.keras import layers, models

# Create the first input branch for mfcc features
input1 = layers.Input(shape=(number_of_frames, 13, 1))
x1 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(input1)  # Convolutional layer with ReLU activation
x1 = layers.MaxPooling2D(pool_size=(2, 2))(x1)  # Max pooling layer
x1 = layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x1)  # Convolutional layer with ReLU activation
x1 = layers.MaxPooling2D(pool_size=(2, 2))(x1)  # Max pooling layer
x1 = layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x1)  # Convolutional layer with ReLU activation
x1 = layers.MaxPooling2D(pool_size=(2, 2))(x1)  # Max pooling layer
x1 = layers.GlobalAveragePooling2D()(x1)  # Global average pooling layer

# Create the second input branch for phone posterior probabilities features
input2 = layers.Input(shape=(number_of_frames, 32, 1))
x2 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(input2)  # Convolutional layer with ReLU activation
x2 = layers.MaxPooling2D(pool_size=(2, 2))(x2)  # Max pooling layer
x2 = layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x2)  # Convolutional layer with ReLU activation
x2 = layers.MaxPooling2D(pool_size=(2, 2))(x2)  # Max pooling layer
x2 = layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x2)  # Convolutional layer with ReLU activation
x2 = layers.MaxPooling2D(pool_size=(2, 2))(x2)  # Max pooling layer
x2 = layers.GlobalAveragePooling2D()(x2)  # Global average pooling layer

# Merge the global average pooling layer output for branches
merged = layers.concatenate([x1, x2])  # Concatenate the outputs of the two branches

# Add additional Dense layer and dropout
merged = layers.Dropout(0.5)(merged)  # Dropout layer for regularization
merged = layers.Dense(128, activation='relu')(merged)  # Fully connected layer with ReLU activation
merged = layers.Dropout(0.3)(merged)  # Dropout layer for regularization

# Add the final classification layer
output = layers.Dense(1, activation='sigmoid')(merged)  # Output layer with sigmoid activation

# Create the model
model = models.Model(inputs=[input1, input2], outputs=output)  # Define the input and output of the model

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])  # Compile the model with Adam optimizer and binary crossentropy loss

# Print the model summary
model.summary()  # Display model architecture summary