# Advanced Hybrid Quantum Neural Network for dGmix Prediction

This notebook implements a hybrid quantum-classical neural network using TensorFlow Quantum to predict dGmix values from the provided dataset.

## 1. Setup and Installation

First, install the required packages. This is especially important in Google Colab.

In [None]:
!pip install tensorflow tensorflow-quantum cirq sympy pandas scikit-learn matplotlib

## 2. Loading Data in Google Colab

There are two main methods to load your dataset in Google Colab:

### Method 1: Direct Upload
For smaller files (< 100MB), you can directly upload the file to the Colab environment.

In [None]:
from google.colab import files
uploaded = files.upload()  # This will prompt you to select and upload your file

# After uploading, your file will be in the current directory
import io
import pandas as pd

# Display the first few rows of the uploaded dataset
df = pd.read_csv('data_filtered-1.csv')
df.head()

### Method 2: Using Google Drive
For larger files or if you want to persist data between sessions, mounting Google Drive is recommended.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Assuming your file is stored in a specific folder in Google Drive
# Adjust the path as needed
dataset_path = '/content/drive/MyDrive/your_folder/data_filtered-1.csv'

# Read and display the first few rows
df = pd.read_csv(dataset_path)
df.head()

## 3. Importing Libraries

In [None]:
import tensorflow as tf
import tensorflow_quantum as tfq
import cirq
import sympy
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import os

# Check TensorFlow and TFQ versions
print(f"TensorFlow version: {tf.__version__}")
print(f"TensorFlow Quantum version: {tfq.__version__}")

## 4. Data Preparation Function

In [None]:
def prepare_data(file_path):
    """Load and preprocess data"""
    df = pd.read_csv(file_path)
    
    # Display initial information
    print(f"Original data shape: {df.shape}")
    print(f"Columns: {df.columns.tolist()}")
    print(f"Data types:\n{df.dtypes}")
    
    # Drop object columns
    object_columns = df.select_dtypes(include=['object']).columns
    print(f"Dropping object columns: {object_columns.tolist()}")
    df = df.drop(columns=object_columns)
    
    # Handle missing values
    missing_values = df.isna().sum().sum()
    print(f"Total missing values: {missing_values}")
    if missing_values > 0:
        df = df.dropna()
        print(f"Shape after removing rows with missing values: {df.shape}")
    
    # Separate features and target
    X = df.drop(columns=['dGmix'])
    y = df['dGmix']
    
    print(f"Features shape: {X.shape}, Target shape: {y.shape}")
    return X, y

## 5. Quantum Circuit Creation

In [None]:
def create_advanced_quantum_circuit(qubits, features=None, n_layers=4):
    """Create a more advanced quantum circuit with feature encoding"""
    circuit = cirq.Circuit()
    
    # Encode features if provided
    if features is not None:
        for i, feature in enumerate(features[:len(qubits)]):
            circuit.append(cirq.rx(np.pi * feature).on(qubits[i]))
            circuit.append(cirq.rz(np.pi * feature).on(qubits[i]))
    
    # Create variational parameters
    params = []
    symbols = []
    
    # Add variational layers
    for l in range(n_layers):
        for i in range(len(qubits)):
            # Rotation gates
            symbol_ry = sympy.Symbol(f'ry_{l}_{i}')
            symbol_rz = sympy.Symbol(f'rz_{l}_{i}')
            symbols.extend([symbol_ry, symbol_rz])
            
            circuit.append(cirq.ry(symbol_ry).on(qubits[i]))
            circuit.append(cirq.rz(symbol_rz).on(qubits[i]))
        
        # Add entangling layer - more complex entanglement pattern
        for i in range(len(qubits)):
            circuit.append(cirq.CNOT(qubits[i], qubits[(i + 1) % len(qubits)]))
            
        # Add non-local interactions in deeper layers
        if l >= n_layers // 2:
            for i in range(0, len(qubits), 2):
                if i + 2 < len(qubits):
                    circuit.append(cirq.CNOT(qubits[i], qubits[i + 2]))
    
    return circuit, symbols

## 6. Hybrid Model Building

In [None]:
def build_advanced_hybrid_model(n_features, n_qubits, readout_qubits=None):
    """Build a more sophisticated hybrid quantum-classical model"""
    if readout_qubits is None:
        readout_qubits = list(range(n_qubits))
    
    # Create qubits
    qubits = [cirq.GridQubit(0, i) for i in range(n_qubits)]
    
    # Create quantum circuit
    circuit, symbols = create_advanced_quantum_circuit(qubits)
    
    # Define observables - use strategic combinations of qubits
    observables = [cirq.Z(qubits[i]) for i in readout_qubits]
    if len(readout_qubits) > 1:
        # Add parity observables for better feature extraction
        for i in range(len(readout_qubits) - 1):
            observables.append(cirq.Z(qubits[readout_qubits[i]]) @ cirq.Z(qubits[readout_qubits[i+1]]))
    
    # Classical pre-processing
    classical_input = tf.keras.layers.Input(shape=(n_features,), name='classical_input')
    x = tf.keras.layers.BatchNormalization()(classical_input)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    encoded_classical = tf.keras.layers.Dense(n_qubits, activation='tanh')(x)
    
    # Quantum processing
    quantum_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='quantum_input')
    quantum_layer = tfq.layers.PQC(circuit, observables)(quantum_input)
    
    # Post-processing
    combined = tf.keras.layers.Concatenate()([encoded_classical, quantum_layer])
    x = tf.keras.layers.Dense(16, activation='relu')(combined)
    output = tf.keras.layers.Dense(1)(x)
    
    # Create the model
    model = tf.keras.models.Model(
        inputs=[classical_input, quantum_input],
        outputs=output
    )
    
    return model

## 7. Cross-Validation and Model Training

In [None]:
# Specify the path to your dataset
# If using direct upload method:
data_path = 'data_filtered-1.csv'

# If using Google Drive method:
# data_path = '/content/drive/MyDrive/your_folder/data_filtered-1.csv'

# Load data
X, y = prepare_data(data_path)
print(f"Data loaded: {X.shape} features, {y.shape} targets")

In [None]:
# Configure model parameters
n_features = X.shape[1]
n_qubits = min(n_features, 8)  # Limit qubits for efficiency
readout_qubits = list(range(min(4, n_qubits)))  # Only read from a subset of qubits

# Create qubits
qubits = [cirq.GridQubit(0, i) for i in range(n_qubits)]

print(f"Using {n_qubits} qubits with {n_features} features")
print(f"Readout qubits: {readout_qubits}")

In [None]:
# Implement k-fold cross-validation for better evaluation
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
fold_results = []

for fold, (train_idx, test_idx) in enumerate(kf.split(X)):
    print(f"\nTraining fold {fold+1}/{n_splits}")
    
    # Split data
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Scale to [0, 1] for quantum encoding
    minmax_scaler = MinMaxScaler()
    X_train_minmax = minmax_scaler.fit_transform(X_train_scaled)
    X_test_minmax = minmax_scaler.transform(X_test_scaled)
    
    # Create quantum circuits for each data point (this can take some time)
    print("Creating quantum circuits for training data...")
    X_train_circuits = [create_advanced_quantum_circuit(qubits, features=x)[0] for x in X_train_minmax]
    
    print("Creating quantum circuits for testing data...")
    X_test_circuits = [create_advanced_quantum_circuit(qubits, features=x)[0] for x in X_test_minmax]
    
    # Convert to TFQ tensors
    print("Converting to TFQ tensors...")
    X_train_tfq = tfq.convert_to_tensor(X_train_circuits)
    X_test_tfq = tfq.convert_to_tensor(X_test_circuits)
    
    # Build model
    print("Building quantum model...")
    model = build_advanced_hybrid_model(n_features, n_qubits, readout_qubits)
    
    # Compile model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    
    # Display model summary
    model.summary()
    
    # Train model
    print("Training model...")
    history = model.fit(
        [X_train_scaled, X_train_tfq],
        y_train,
        batch_size=32,
        epochs=50,
        validation_split=0.2,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
        ],
        verbose=1
    )
    
    # Plot training history
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'Fold {fold+1} Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Validation'])
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['mae'])
    plt.plot(history.history['val_mae'])
    plt.title(f'Fold {fold+1} MAE')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.legend(['Train', 'Validation'])
    plt.tight_layout()
    plt.show()
    
    # Evaluate model
    print("Evaluating model...")
    y_pred = model.predict([X_test_scaled, X_test_tfq])
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Fold {fold+1} - MSE: {mse:.4f}, R²: {r2:.4f}")
    fold_results.append((mse, r2))
    
    # Visualize predictions
    plt.figure(figsize=(8, 6))
    plt.scatter(y_test, y_pred)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title(f'Fold {fold+1} - Predictions vs Actual (R² = {r2:.4f})')
    plt.show()
    
    # Create directory for saving results
    os.makedirs('results', exist_ok=True)
    try:
        # Try to save the model (may not work in Colab without mounting drive)
        model.save(f'results/quantum_model_fold_{fold+1}')
    except Exception as e:
        print(f"Could not save model: {str(e)}")
        print("To save models in Colab, you need to mount Google Drive and save to the drive path")

## 8. Calculate Average Performance

In [None]:
# Calculate average performance
avg_mse = np.mean([res[0] for res in fold_results])
avg_r2 = np.mean([res[1] for res in fold_results])
print(f"\nAverage performance across {n_splits} folds:")
print(f"Mean MSE: {avg_mse:.4f}")
print(f"Mean R²: {avg_r2:.4f}")

## 9. Train Final Model on All Data

In [None]:
# Train final model on all data
print("\nTraining final model on all data...")

# Standardize all features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Scale to [0, 1] for quantum encoding
minmax_scaler = MinMaxScaler()
X_minmax = minmax_scaler.fit_transform(X_scaled)

# Create quantum circuits
print("Creating quantum circuits for all data...")
X_circuits = [create_advanced_quantum_circuit(qubits, features=x)[0] for x in X_minmax]
X_tfq = tfq.convert_to_tensor(X_circuits)

# Build final model
final_model = build_advanced_hybrid_model(n_features, n_qubits, readout_qubits)

# Compile model
final_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae']
)

# Train model
final_history = final_model.fit(
    [X_scaled, X_tfq],
    y,
    batch_size=32,
    epochs=100,
    validation_split=0.1,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=8)
    ],
    verbose=1
)

## 10. Visualize Final Model Results

In [None]:
# Plot training history
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(final_history.history['loss'])
plt.plot(final_history.history['val_loss'])
plt.title('Final Model Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper right')

plt.subplot(1, 2, 2)
plt.plot(final_history.history['mae'])
plt.plot(final_history.history['val_mae'])
plt.title('Final Model MAE')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.tight_layout()
plt.show()

## 11. Save Final Model (to Google Drive)

In [None]:
# For saving to Google Drive
try:
    # Create a directory in Google Drive
    drive_dir = '/content/drive/MyDrive/quantum_models'
    os.makedirs(drive_dir, exist_ok=True)
    
    # Save the model
    final_model.save(f'{drive_dir}/final_quantum_model')
    
    # Save the visualization
    plt.figure(figsize=(10, 6))
    plt.plot(final_history.history['loss'])
    plt.plot(final_history.history['val_loss'])
    plt.title('Final Model Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Validation'], loc='upper right')
    plt.savefig(f'{drive_dir}/final_model_training.png')
    
    # Save the scalers for future prediction
    import pickle
    with open(f'{drive_dir}/standard_scaler.pkl', 'wb') as f:
        pickle.dump(scaler, f)
    with open(f'{drive_dir}/minmax_scaler.pkl', 'wb') as f:
        pickle.dump(minmax_scaler, f)
        
    print(f"Model and associated files saved to {drive_dir}")
except Exception as e:
    print(f"Error saving to Google Drive: {str(e)}")
    print("Make sure you have mounted your Google Drive and have write permissions.")

## 12. Using the Model for Prediction (Example)

In [None]:
# Example of how to use the model for prediction
def make_prediction(model, new_data, scaler, minmax_scaler, qubits):
    """Make predictions using the trained quantum model"""
    # Scale the data
    scaled_data = scaler.transform(new_data)
    minmax_data = minmax_scaler.transform(scaled_data)
    
    # Create quantum circuits
    circuits = [create_advanced_quantum_circuit(qubits, features=x)[0] for x in minmax_data]
    tfq_data = tfq.convert_to_tensor(circuits)
    
    # Make prediction
    predictions = model.predict([scaled_data, tfq_data])
    return predictions

# Example (assuming we have some new data)
# new_data = X.iloc[0:5]  # Just for demonstration, using the first 5 rows
# predictions = make_prediction(final_model, new_data, scaler, minmax_scaler, qubits)
# print("Example predictions:")
# for i, pred in enumerate(predictions):
#     print(f"Sample {i+1}: Predicted dGmix = {pred[0]:.4f}")