<a href="https://colab.research.google.com/github/sonnyloweus/QuantumDynamicsAI/blob/main/Mutual_Information_Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# File Setup

In [13]:
from google.colab import drive
import os
import sys

drive.mount('/content/drive')
directory_path = '/content/drive/MyDrive/Quantum/'
sys.path.append('/content/drive/MyDrive/Quantum')

print(os.listdir(directory_path))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
['Symmetric_Exclusion_Process _Simulator.ipynb', 'Project Presentation Short.gslides', 'dense_small.param', 'quantum_simulation_data.pkl', 'Quantum_Brickworks_Circuit_Simulator.ipynb', 'DiscreteVariationalParameterizations.py', 'Screenshots', '__pycache__', 'QuantumSimulatorDataset.py', 'GibbsSampling.py', 'DiscreteVariationalParameterizationsDeepV2.py', 'DiscreteVariationalParameterizationsDeepV2.ipynb', 'quantum_experiments', 'DiscreteVariationalParameterizationsDeepV3.py', 'Mutual_Information_Maximizing_Model.ipynb', 'Mutual_Information_Transformer.ipynb']


In [14]:
!pip install qiskit-aer
!pip install qiskit
!pip install pylatexenc
!pip install tqdm




In [15]:
from QuantumSimulatorDataset import QuantumSimulationDatasetFast, generate_circuit_params
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
import ast
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader, random_split
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

# Data Processing

In [16]:
# Load and prepare the data
data_path = '/content/drive/MyDrive/Quantum/quantum_simulation_data.pkl'
data = pd.read_pickle(data_path)

def to_tensor(item):
    if isinstance(item, torch.Tensor):
        # If already a tensor, clone and detach it to prevent issues
        return item.clone().detach()
    elif isinstance(item, str):
        # Convert string to list using ast.literal_eval
        item_list = ast.literal_eval(item)
        return torch.tensor(item_list)
    elif isinstance(item, list):
        return torch.tensor(item)
    else:
        raise ValueError(f"Expected a list or tensor, but got {type(item)}")

data.shape

(64000, 2)

In [17]:
data['Initial_State'] = [to_tensor(lst) for lst in data['Initial_State']]
data['Final_State'] = [to_tensor(lst) for lst in data['Final_State']]

print(data['Final_State'][0])

# Convert Series to list of tensors before stacking
initial_state_tensors = list(data['Initial_State'])
final_state_tensors = list(data['Final_State'])

dataset = TensorDataset(torch.stack(initial_state_tensors), torch.stack(final_state_tensors))
print(dataset)

# Define the split sizes (e.g., 80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

# Perform the train-test split
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Example of how to use DataLoader for training and testing
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# X_train, X_test, y_train, y_test = train_test_split(initial_state_tensors, final_state_tensors, test_size=0.2, random_state=42)
# train_loader = torch.utils.data.DataLoader(list(zip(X_train, y_train)), batch_size=32, shuffle=False)
# test_loader = torch.utils.data.DataLoader(list(zip(X_test, y_test)), batch_size=32, shuffle=False)

tensor([0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 1.])
<torch.utils.data.dataset.TensorDataset object at 0x7df4c7acdea0>


# Mutual Information Loss

In [18]:
def mutual_information_loss(x, y):
    # Ensure x and y are tensors and have the same shape
    assert x.shape == y.shape, "x and y must have the same shape"

    # Compute joint histogram
    joint_hist = torch.histc((x * y).float(), bins=256, min=0, max=256)

    # Compute marginal histograms
    x_hist = torch.histc(x.float(), bins=256, min=0, max=256)
    y_hist = torch.histc(y.float(), bins=256, min=0, max=256)

    # Normalize histograms to get probabilities
    joint_prob = joint_hist / joint_hist.sum()
    x_prob = x_hist / x_hist.sum()
    y_prob = y_hist / y_hist.sum()

    # Compute the entropies
    H_x = -torch.sum(x_prob * torch.log(x_prob + 1e-12))
    H_y = -torch.sum(y_prob * torch.log(y_prob + 1e-12))
    H_xy = -torch.sum(joint_prob * torch.log(joint_prob + 1e-12))

    # Compute mutual information
    I_xy = H_x + H_y - H_xy

    # Return mutual information loss (negative mutual information)
    mutual_info_loss = -I_xy

    return mutual_info_loss

# Model Definition

In [19]:
class QuantumTransformer(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_heads, output_dim):
        super(QuantumTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, hidden_dim)
        self.transformer = nn.Transformer(
            d_model=hidden_dim,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers
        )
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, src):
        src_emb = self.embedding(src).unsqueeze(0)  # Add sequence dimension
        transformer_out = self.transformer(src_emb, src_emb).squeeze(0)  # Remove sequence dimension
        out = self.fc(transformer_out)
        return self.sigmoid(out)

    def predict_binary(self, src):
        outputs = self.forward(src)
        binary_outputs = (outputs > 0.5).float()  # Convert probabilities to binary
        return binary_outputs

# Model Initialization and Training

In [20]:
# Version 1
# Instantiate the model
input_dim = 12
hidden_dim = 64
num_layers = 4
num_heads = 8
output_dim = 12

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = QuantumTransformer(input_dim, hidden_dim, num_layers, num_heads, output_dim)
print('Device Running: ', device)

Device Running:  cpu




In [21]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=10)

losses = []

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for idx, (inputs, targets) in enumerate(tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}')):
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # Calculate binary accuracy
        predicted = (outputs > 0.5).float()
        correct_predictions += (predicted == targets).sum().item()
        total_samples += targets.numel()

    avg_loss = total_loss / len(train_loader)
    losses.append(avg_loss)
    binary_accuracy = correct_predictions / total_samples

    print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.10f}, Binary Accuracy: {binary_accuracy:.10f}')


# If you want to include mutual information calculations
    # if idx % 100 == 0:
    #     mi_dynamic_loss = mutual_information_loss(inputs, outputs).mean()
    #     expected_mi_dynamic_loss = mutual_information_loss(inputs, targets).mean()
    #     mi_final_loss = mutual_information_loss(targets, outputs).mean()
    #     print(' Iteration', idx, 'I(x,y) >', f"{expected_mi_dynamic_loss:,.5f}", \
    #           ' I(x̄,y) >', f"{mi_dynamic_loss:,.5f}", \
    #           ' I(x,x̄) >', f"{mi_final_loss:,.5f}", \
    #           ' Loss > ', f"{loss:,.5f}")


Epoch 1/10: 100%|██████████| 1600/1600 [02:59<00:00,  8.93it/s]


Epoch 1/10, Loss: 49.9350431339, Binary Accuracy: 0.5003206380


Epoch 2/10:  70%|███████   | 1120/1600 [02:36<01:06,  7.18it/s]


KeyboardInterrupt: 

In [None]:
losses_np = np.array(losses)

# Plot the loss after training
sns.set_style("whitegrid")

plt.figure(figsize=(8, 6))
plt.plot(range(1, len(losses_np)+1), losses_np, marker='o')
plt.title('Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.xticks(range(1, len(losses_np)))
plt.grid()
plt.show()

In [None]:
# Testing loop
model.eval()
total_loss = 0.0
predictions = []
given = []
actual = []

with torch.no_grad():
    for idx, (inputs, targets) in enumerate(tqdm(test_loader, desc=f'Epoch {epoch+1}/{epochs}')):
        inputs, targets = inputs.to(device), targets.to(device)

        outputs = model(inputs.long(), targets.long())
        loss = criterion(outputs, targets.float())
        total_loss += loss.item()

        for i in range(len(inputs)):
            predictions.append(outputs[i].tolist())
            given.append(inputs[i].tolist())
            actual.append(targets[i].tolist())

    avg_test_loss = total_loss / len(test_loader)
    print(f'Test Loss: {avg_test_loss:.4f}')

# Save the model and predictions
torch.save(model.state_dict(), 'quantum_transformer_model.pth')
preds = {
    'Initial_States': given,
    'Final_State_Pred': predictions,
    'Final_States': actual
}
final_df = pd.DataFrame(preds)
final_df.head()

In [None]:
class QuantumTransformer(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_heads, output_dim):
        super(QuantumTransformer, self).__init__()
        self.embedding = nn.Embedding(2, hidden_dim)
        self.transformer = nn.Transformer(
            d_model=hidden_dim,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers
        )
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, src):
        # src shape: [batch_size, seq_len]
        batch_size, seq_len = src.shape
        src_emb = self.embedding(src)  # shape: [batch_size, seq_len, hidden_dim]
        # print("src_emb shape: ", src_emb.shape)

        pos_enc = self.position_encoding[:, :seq_len, :] # shape: [1, seq_len, hidden_dim]
        # print("pos_enc shape: ", pos_enc.shape)

        src_emb = src_emb + pos_enc

        # Permute for Transformer
        src_emb = src_emb.permute(1, 0, 2)  # shape: [seq_len, batch_size, hidden_dim]
        # print("permuted shape: ", src_emb.shape)

        transformer_out = self.transformer(src_emb, src_emb)  # Remove sequence dimension
        final_hidden_state = transformer_out[-1]

        out = self.fc(final_hidden_state)
        return self.sigmoid(out)


    def predict_binary(self, src):
        outputs = self.forward(src)
        binary_outputs = (outputs > 0.5).float()  # Convert probabilities to binary
        return binary_outputs