In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
from tensorflow.keras.layers import Conv1D, MaxPool1D, Flatten, Dense, Reshape,  GlobalAveragePooling1D, Masking, Input, MaxPooling1D, GlobalMaxPooling1D, Add, Dropout, BatchNormalization, UpSampling1D, Lambda, Conv2D,Concatenate,Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras import models, layers, regularizers, Input

In [9]:
x_train = np.load('x_train_signfi.npy')
x_test = np.load('x_test_signfi.npy')
x_val = np.load('x_val_signfi.npy')
y_train = np.load('y_train_signfi.npy')
y_test = np.load('y_test_signfi.npy')
y_val = np.load('y_val_signfi.npy')

In [12]:
x_train.shape, x_test.shape, y_train.shape

((1500, 200, 6, 1), (760, 200, 6, 1), (1500, 1))

In [11]:
# Reshape the data to add an extra dimension for the CNN
x_train = np.expand_dims(x_train, axis=-1)  # Shape: (num_samples, 200, 6, 1)
x_val = np.expand_dims(x_val, axis=-1)      # Shape: (num_samples, 200, 6, 1)
x_test = np.expand_dims(x_test, axis=-1)    # Shape: (num_samples, 200, 6, 1)

# Teacher

In [34]:
from tensorflow.keras import layers, models, Input


# Input layer
inputs = Input(shape=(200, 6, 1))
    
# Convolutional Layer 1
x = layers.Conv2D(3, (3, 3), padding='same', activation='relu')(inputs)
x = layers.BatchNormalization()(x)
x = layers.AveragePooling2D(pool_size=(2, 2))(x)
    
# Convolutional Layer 2
x = layers.Conv2D(3, (3, 3), padding='same', activation='relu')(x)
x = layers.BatchNormalization()(x)
x_b_T = layers.AveragePooling2D(pool_size=(2, 2))(x)
    
# Dropout Layer
x = layers.Dropout(0.6)(x_b_T)
    
# Flatten the output to feed into fully connected layer
x = layers.Flatten()(x)
    
# Fully Connected Layer
outputs = layers.Dense(276, activation='softmax')(x)  # Output layer with softmax activation for 276 classes
    
# Create the model
teacher_model = models.Model(inputs=inputs, outputs=outputs)

teacher_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
teacher_model.summary()

In [35]:
# Define the early stopping callback
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train the model
history = teacher_model.fit(
    np.array(x_train), np.array(y_train), 
    validation_data=(x_val, y_val), 
    epochs=2000, 
    batch_size=32, 
    callbacks=[callback]
)

Epoch 1/2000
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.0061 - loss: 5.8441 - val_accuracy: 0.0060 - val_loss: 5.6025
Epoch 2/2000
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0210 - loss: 5.4012 - val_accuracy: 0.0160 - val_loss: 5.5222
Epoch 3/2000
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0460 - loss: 4.9764 - val_accuracy: 0.0700 - val_loss: 5.3117
Epoch 4/2000
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0988 - loss: 4.5753 - val_accuracy: 0.1500 - val_loss: 4.9910
Epoch 5/2000
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1903 - loss: 4.0637 - val_accuracy: 0.2720 - val_loss: 4.5297
Epoch 6/2000
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3558 - loss: 3.4666 - val_accuracy: 0.3760 - val_loss: 4.0644
Epoch 7/2000
[1m47/47[0m 

In [37]:
teacher_model.evaluate(np.array(x_test), np.array(y_test))

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 0.0030


[0.0031220901291817427, 1.0]

# Student

In [38]:
from tensorflow.keras import layers, models, Input
from tensorflow.keras.layers import Layer


# Input layer
inputs = Input(shape=(200, 6, 1))

# First Convolutional Layer repeated five times
branch1 = layers.Conv2D(3, (3, 3), activation='relu', strides=(4, 6), padding='same')(inputs)
branch1 = layers.BatchNormalization()(branch1)

branch2 = layers.Conv2D(3, (3, 3), activation='relu', strides=(4, 6), padding='same')(inputs)
branch2 = layers.BatchNormalization()(branch2)

branch3 = layers.Conv2D(3, (3, 3), activation='relu', strides=(4, 6), padding='same')(inputs)
branch3 = layers.BatchNormalization()(branch3)

branch4 = layers.Conv2D(3, (3, 3), activation='relu', strides=(4, 6), padding='same')(inputs)
branch4 = layers.BatchNormalization()(branch4)

branch5 = layers.Conv2D(3, (3, 3), activation='relu', strides=(4, 6), padding='same')(inputs)
branch5 = layers.BatchNormalization()(branch5)

# Adding the outputs of the five branches
x_b_S = layers.Add()([branch1, branch2, branch3, branch4, branch5])

# Dropout Layer (commented out)
# x = layers.Dropout(0.5)(x)

# Flatten the output
cat = layers.Flatten()(x_b_S)
cat = layers.Dense(32, activation='relu')(cat)

# Fully Connected Layer
outputs = layers.Dense(276, activation='softmax', name = 'out_s')(cat)  # Output layer with softmax activation for 276 classes

# Create the model
student_model = models.Model(inputs=inputs, outputs=[x_b_S, outputs])

# Compile the model
student_model.compile(optimizer='adam', loss=['mean_squared_error', 'binary_crossentropy'], metrics= ['accuracy'])

# Display the model summary
# st_model.summary()

In [39]:
student_model.summary()

# Discriminator

In [20]:
def create_discriminator(output_dim):
    inputs = Input(shape=(output_dim,))
    x = Dense(128, activation='relu')(inputs)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    outputs = Dense(1, activation='sigmoid')(x)  # Binary output: real or fake
    return Model(inputs, outputs, name="Discriminator")


In [21]:
def knowledge_distillation_loss(teacher_logits, student_logits, temp=3):
    teacher_probs = tf.nn.softmax(teacher_logits / temp)
    student_probs = tf.nn.softmax(student_logits / temp)
    return KLDivergence()(teacher_probs, student_probs) * (temp ** 2)

In [22]:
def train_kd_adversarial(teacher_model, student_model, discriminator, train_data, epochs=10):
    lambda_adv = 0.5  # Weight for adversarial loss

    for epoch in range(epochs):
        print(f"\nEpoch {epoch + 1}/{epochs}")
        
        for x_batch, y_batch in train_data:
            # Step 1: Generate teacher and student predictions
            teacher_preds = teacher_model(x_batch, training=False)
            student_preds = student_model(x_batch, training=True)
            
            # Step 2: Train discriminator to distinguish teacher vs. student
            real_labels = tf.ones((x_batch.shape[0], 1))  # Teacher predictions are real
            fake_labels = tf.zeros((x_batch.shape[0], 1))  # Student predictions are fake
            with tf.GradientTape() as tape:
                real_logits = discriminator(teacher_preds)
                fake_logits = discriminator(student_preds)
                d_loss_real = BinaryCrossentropy()(real_labels, real_logits)
                d_loss_fake = BinaryCrossentropy()(fake_labels, fake_logits)
                d_loss = d_loss_real + d_loss_fake
            grads = tape.gradient(d_loss, discriminator.trainable_variables)
            discriminator.optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))
            
            # Step 3: Train student to mimic teacher (KD loss) and fool discriminator (adversarial loss)
            with tf.GradientTape() as tape:
                kd_loss = knowledge_distillation_loss(teacher_preds, student_preds)
                adversarial_loss = BinaryCrossentropy()(real_labels, discriminator(student_preds))
                total_loss = kd_loss + lambda_adv * adversarial_loss
            grads = tape.gradient(total_loss, student_model.trainable_variables)
            student_model.optimizer.apply_gradients(zip(grads, student_model.trainable_variables))

            print(f"Discriminator Loss: {d_loss.numpy():.4f}, KD Loss: {kd_loss.numpy():.4f}, Adv Loss: {adversarial_loss.numpy():.4f}")


In [26]:
train_kd_adversarial(teacher_model, student_model, discriminator, train_data, epochs=10)

NameError: name 'discriminator' is not defined

# adversial 2

In [29]:
import tensorflow as tf
from tensorflow.keras import layers, Model

# Input shape
input_shape = (90,)

# Teacher Model (pretrained, frozen)
teacher_input = layers.Input(shape=input_shape)
teacher_bottleneck = layers.Dense(64, activation="relu", name="bottleneck_layer")(teacher_input)
teacher_output = layers.Dense(10, activation="softmax", name="output_layer")(teacher_bottleneck)
teacher_model = Model(teacher_input, [teacher_bottleneck, teacher_output], name="Teacher")
teacher_model.trainable = False  # Freeze the teacher model

# Student Model (to train)
student_input = layers.Input(shape=input_shape)
student_bottleneck = layers.Dense(64, activation="relu", name="bottleneck_layer")(student_input)
student_output = layers.Dense(10, activation="softmax", name="output_layer")(student_bottleneck)
student_model = Model(student_input, [student_bottleneck, student_output], name="Student")

# Discriminator Model
discriminator_input = layers.Input(shape=(64 + 10,))  # Bottleneck + Output
discriminator_hidden = layers.Dense(64, activation="relu")(discriminator_input)
discriminator_output = layers.Dense(1, activation="sigmoid")(discriminator_hidden)
discriminator = Model(discriminator_input, discriminator_output, name="Discriminator")

# Combine for Training
teacher_pred = teacher_model(teacher_input)
student_pred = student_model(student_input)

# Concatenate bottleneck and output layers
teacher_concat = layers.Concatenate()([teacher_pred[0], teacher_pred[1]])
student_concat = layers.Concatenate()([student_pred[0], student_pred[1]])

# Discriminator loss: Binary cross-entropy
discriminator.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

# Adversarial loss for Student
def adversarial_loss(y_true, y_pred):
    return tf.keras.losses.binary_crossentropy(y_true, y_pred)

# Student training step
for epoch in range(5):
    # Step 1: Train the discriminator

        # Step 1: Train the discriminator
    # Use teacher and student predictions to train the discriminator
    teacher_labels = tf.ones((32, 1))  # Label as "1" for teacher
    student_labels = tf.zeros((32, 1))  # Label as "0" for student
    
    # Concatenate predictions and train discriminator
    discriminator.train_on_batch(tf.concat([teacher_concat, student_concat], axis=0),
                                  tf.concat([teacher_labels, student_labels], axis=0))

    # Step 2: Train the student to fool the discriminator
    with tf.GradientTape() as tape:
        # Get student predictions
        student_bottleneck, student_output = student_model(student_input)
        student_concat = tf.concat([student_bottleneck, student_output], axis=1)
        
        # Discriminator's prediction on student output
        student_discriminator_pred = discriminator(student_concat)
        
        # Calculate adversarial loss (fool discriminator) + KD loss
        adversarial_loss_value = adversarial_loss(tf.ones_like(student_discriminator_pred), student_discriminator_pred)
        kd_loss = tf.keras.losses.mean_squared_error(teacher_pred[0], student_bottleneck) + \
                  tf.keras.losses.categorical_crossentropy(teacher_pred[1], student_output)
        
        # Total loss
        total_loss = adversarial_loss_value + kd_loss

    # Update student model weights
    gradients = tape.gradient(total_loss, student_model.trainable_weights)
    student_optimizer.apply_gradients(zip(gradients, student_model.trainable_weights))


ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


# Adversial 3

In [48]:
from tensorflow.keras import layers, models, Input
discriminator_input_shape = 426
# Discriminator Model
discriminator_input = Input(shape=(discriminator_input_shape,))  # Bottleneck + Output size
x = layers.Dense(128, activation='relu')(discriminator_input)
x = layers.Dropout(0.5)(x)
x = layers.Dense(64, activation='relu')(x)
discriminator_output = layers.Dense(1, activation='sigmoid')(x)  # Binary classification
discriminator = models.Model(discriminator_input, discriminator_output, name="Discriminator")

discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.summary()


In [51]:
from tensorflow.keras.losses import MeanSquaredError, CategoricalCrossentropy, BinaryCrossentropy


In [55]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical

# Assuming y_train and y_test are categorical labels
y_train_onehot = to_categorical(y_train, num_classes=276)
y_test_onehot = to_categorical(y_test, num_classes=276)

# Optimizers
student_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

# Training Parameters
batch_size = 32
epochs = 50

# Training Loop
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    
    for step in range(0, len(x_train), batch_size):
        # Fetch a batch of training data
        x_batch = x_train[step:step + batch_size]
        y_batch = y_train_onehot[step:step + batch_size]

        # Teacher Predictions (fixed)
        # teacher_bottleneck, teacher_output = teacher_model.predict(x_batch)
        # Teacher Predictions (fixed)
        teacher_output = teacher_model.predict(x_batch)

        model_bottleneck = Model(inputs=teacher_model.input, outputs=x_b_T)

        teacher_bottleneck = model_bottleneck.predict(np.array(x_batch))  # Placeholder for bottleneck, if needed


        # Student Predictions
        with tf.GradientTape() as tape_s:
            student_bottleneck, student_output = student_model(x_batch)

            teacher_bottleneck_flat = tf.keras.layers.Flatten()(teacher_bottleneck)
            student_bottleneck_flat = tf.keras.layers.Flatten()(student_bottleneck)

            # Concatenate bottleneck and output for discriminator
            teacher_concat = tf.concat([teacher_bottleneck_flat, teacher_output], axis=1)
            student_concat = tf.concat([student_bottleneck_flat, student_output], axis=1)

            # Discriminator Predictions
            discriminator_teacher = discriminator(teacher_concat)
            discriminator_student = discriminator(student_concat)

            # Discriminator Loss
            disc_loss = tf.keras.losses.binary_crossentropy(
                tf.concat([tf.ones_like(discriminator_teacher), tf.zeros_like(discriminator_student)], axis=0),
                tf.concat([discriminator_teacher, discriminator_student], axis=0)
            )

            mse_loss = tf.keras.losses.MeanSquaredError()
            categorical_loss = tf.keras.losses.CategoricalCrossentropy()
            binary_loss = tf.keras.losses.BinaryCrossentropy()

            # Knowledge Distillation Loss (MSE + Categorical Crossentropy)
            kd_loss = tf.reduce_mean(mse_loss(teacher_bottleneck_flat, student_bottleneck_flat)) + \
                      tf.reduce_mean(categorical_loss(y_batch, student_output))

            # Adversarial Loss (student tries to fool discriminator)
            adv_loss = tf.reduce_mean(binary_loss(
                tf.ones_like(discriminator_student), discriminator_student))

            # Total Student Loss
            student_loss = kd_loss + adv_loss

        # Backpropagation for Student
        grads_student = tape_s.gradient(student_loss, student_model.trainable_weights)
        student_optimizer.apply_gradients(zip(grads_student, student_model.trainable_weights))

        # # Train the Discriminator
        # with tf.GradientTape() as tape_d:
        #     # Discriminator Loss (Teacher vs Student)
        #     discriminator_loss = tf.reduce_mean(disc_loss)

        # grads_discriminator = tape_d.gradient(discriminator_loss, discriminator.trainable_weights)
        # discriminator_optimizer.apply_gradients(zip(grads_discriminator, discriminator.trainable_weights))

        # Training the discriminator
with tf.GradientTape() as tape_d:
    # Teacher and Student bottleneck + output predictions
    discriminator_teacher = discriminator(tf.concat([teacher_bottleneck_flat, teacher_output], axis=1))
    discriminator_student = discriminator(tf.concat([student_bottleneck_flat, student_output], axis=1))
    
    # Combine labels (1 for teacher, 0 for student)
    true_labels = tf.concat([tf.ones_like(discriminator_teacher), tf.zeros_like(discriminator_student)], axis=0)
    pred_outputs = tf.concat([discriminator_teacher, discriminator_student], axis=0)
    
    # Compute discriminator loss
    disc_loss = binary_loss(true_labels, pred_outputs)
    
# Calculate gradients
grads_discriminator = tape_d.gradient(disc_loss, discriminator.trainable_weights)

# Apply gradients to discriminator
if grads_discriminator:  # Check if gradients are not None
    discriminator_optimizer.apply_gradients(zip(grads_discriminator, discriminator.trainable_weights))
else:
    print("No gradients computed for discriminator. Check model connection and loss computation.")

    
    # Evaluate on test data
    teacher_bottleneck_test, teacher_output_test = teacher_model.predict(x_test)
    student_bottleneck_test, student_output_test = student_model.predict(x_test)

    # Discriminator evaluation
    teacher_concat_test = tf.concat([teacher_bottleneck_test, teacher_output_test], axis=1)
    student_concat_test = tf.concat([student_bottleneck_test, student_output_test], axis=1)
    discriminator_test_loss, discriminator_test_accuracy = discriminator.evaluate(
        tf.concat([teacher_concat_test, student_concat_test], axis=0),
        tf.concat([tf.ones((len(x_test), 1)), tf.zeros((len(x_test), 1))], axis=0),
        verbose=0
    )

    print(f"Epoch {epoch + 1}: Student Loss = {student_loss.numpy()}, Discriminator Test Loss = {discriminator_test_loss:.4f}, Discriminator Test Accuracy = {discriminator_test_accuracy:.4f}")


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [56]:
student_model.summary()

In [62]:
# Get predictions from the student model
_, student_class_predictions = student_model.predict(x_test)

# Convert one-hot-encoded `y_test` back to labels if needed
y_test_labels = tf.argmax(y_test_onehot, axis=1)  # Assuming y_test is one-hot encoded

# Compute accuracy for the student model
student_accuracy = tf.reduce_mean(
    tf.cast(tf.equal(tf.argmax(student_class_predictions, axis=1), y_test_labels), tf.float32)
)
print(student_accuracy)
print(f"Student Model Accuracy: {student_accuracy.numpy() * 100:.2f}%")


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
tf.Tensor(0.035526317, shape=(), dtype=float32)
Student Model Accuracy: 3.55%


In [60]:
y_test.shape

(760, 1)

In [63]:
result = student_model.predict(x_test)

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 605us/step
