# 🧠 Entangled AI Models — CNN + MLP  
This notebook demonstrates a proof-of-concept for *entangled learning* between heterogeneous models (a CNN and an MLP).  
Each model independently learns a classification task, but their predictions are softly synchronized through a KL-based entangled loss component.  


## 1. Load and Prepare MNIST Data

In [5]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import numpy as np

# Load data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Flatten and normalize
x_train = x_train.reshape(-1, 28*28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28*28).astype("float32") / 255.0

# One-hot encode labels
y_train_cat = to_categorical(y_train, 10)
y_test_cat = to_categorical(y_test, 10)


## 2. Define CNN and MLP Models

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Reshape

# Model A: CNN
model_A = Sequential([
    Reshape((28, 28, 1), input_shape=(784,)),
    Conv2D(16, (3, 3), activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

# Model B: MLP
model_B = Sequential([
    Dense(64, activation='relu', input_shape=(784,)),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])


  super().__init__(**kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## 3. Define Entangled Loss Function

In [4]:
import tensorflow as tf
from tensorflow.keras.losses import KLDivergence, CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam

loss_fn = CategoricalCrossentropy()

def compute_entangled_loss(y_true, y_pred_self, y_pred_other, base_loss_fn=None, entangle_weight=0.01):
    if base_loss_fn is None:
        base_loss_fn = tf.keras.losses.CategoricalCrossentropy()
    kl_div = tf.keras.losses.KLDivergence()
    base_loss = base_loss_fn(y_true, y_pred_self)
    entangled_part = kl_div(y_pred_self, y_pred_other)
    return base_loss + entangle_weight * entangled_part


optimizer_A = Adam(learning_rate=0.01)
optimizer_B = Adam(learning_rate=0.01)

model_A.compile(optimizer=optimizer_A, loss=loss_fn, metrics=["accuracy"])
model_B.compile(optimizer=optimizer_B, loss=loss_fn, metrics=["accuracy"])

## 4. Train Both Models with Entangled Learning

In [None]:
batch_size = 8
epochs = 30
def get_lambda(epoch, max_epochs, max_lambda=0.05):
    return (epoch / max_epochs) * max_lambda

lambda_history = []
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(100).batch(batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")
    
    λ = get_lambda(epoch, max_epochs=30, max_lambda=0.05)
    lambda_history.append(λ)
    print(f"Epoch {epoch+1}: λ = {λ:.4f}")
      
    epoch_loss_A, epoch_loss_B = [], []
    correct_A, correct_B = 0, 0
    total = 0

    for step, (x_batch, y_batch) in enumerate(train_dataset):
        # Model B'nin çıktısını al
        last_output_B = model_B(x_batch, training=False)
        last_output_A = model_A(x_batch, training=False)
        # === A eğitimi ===
        with tf.GradientTape() as tape_A:
            pred_A = model_A(x_batch, training=True)
            loss_A = compute_entangled_loss(
            y_true=y_batch,
            y_pred_self=pred_A,
            y_pred_other=last_output_B,
            base_loss_fn=loss_fn,
            entangle_weight=λ
            )
        grads_A = tape_A.gradient(loss_A, model_A.trainable_weights)
        optimizer_A.apply_gradients(zip(grads_A, model_A.trainable_weights))
        epoch_loss_A.append(loss_A.numpy())

        # === B eğitimi ===
        with tf.GradientTape() as tape_B:
            pred_B = model_B(x_batch, training=True)
            loss_B = compute_entangled_loss(
            y_true=y_batch,
            y_pred_self=pred_B,
            y_pred_other=last_output_A,
            base_loss_fn=loss_fn,
            entangle_weight=λ
            )
        grads_B = tape_B.gradient(loss_B, model_B.trainable_weights)
        optimizer_B.apply_gradients(zip(grads_B, model_B.trainable_weights))
        epoch_loss_B.append(loss_B.numpy())

        # Accuracy hesabı
        correct_A += np.sum(tf.argmax(pred_A, axis=1).numpy() == tf.argmax(y_batch, axis=1).numpy())
        correct_B += np.sum(tf.argmax(pred_B, axis=1).numpy() == tf.argmax(y_batch, axis=1).numpy())
        total += x_batch.shape[0]

    print(f"Model A - Loss: {np.mean(epoch_loss_A):.4f}, Accuracy: {correct_A/total:.4f}")
    print(f"Model B - Loss: {np.mean(epoch_loss_B):.4f}, Accuracy: {correct_B/total:.4f}")

### Final Results

- **Model A (CNN):**
  - Final Accuracy: 99.64%
  - Final Loss: 0.0318

- **Model B (MLP):**
  - Final Accuracy: 98.74%
  - Final Loss: 0.0659

- **Lambda Scheduler:**
  - Linear increase from 0 to 0.05 over 30 epochs


---

## 🧠 Summary

In this notebook, we demonstrated an entangled learning setup using a CNN and an MLP on the MNIST dataset.  
Each model was trained independently, but their outputs were softly synchronized using a dynamic KL-divergence-based loss function.  
This architecture shows promising potential for distributed and privacy-preserving collaborative learning.

---