In [None]:
pip install pandas numpy scikit-learn pennylane matplotlib pennylane-sklearn pennylane-lightning


Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement pennylane-sklearn (from versions: none)
ERROR: No matching distribution found for pennylane-sklearn


In [None]:
import pennylane as qml
from pennylane import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Step 3: Load and Mount Dataset
# Replace 'path_to_dataset.csv' with the actual path to your dataset
dataset_path = r'C:\Users\sathw\OneDrive\Desktop\typeII_AGN_metadata.csv'
data = pd.read_csv(dataset_path)

# Step 4: Define Target and Features
target_column = 'log_bh_mass'
feature_columns = [
    'h_beta_flux', 'h_beta_flux_err', 'oiii_5007_flux', 'oiii_5007_flux_err',
    'h_alpha_flux', 'h_alpha_flux_err', 'nii_6584_flux', 'nii_6584_flux_err',
    'log_stellar_sigma', 'psfMag_u', 'psfMag_g', 'psfMag_r', 'psfMag_i', 'psfMag_z',
    'psfMagErr_u', 'psfMagErr_g', 'psfMagErr_r', 'psfMagErr_i', 'psfMagErr_z',
    'mendel_logM_p50', 'mendel_logM_p16', 'mendel_logM_p84', 'mendel_logMt_p50',
    'mendel_logMt_p16', 'mendel_logMt_p84', 'mendel_logMb_p50', 'mendel_logMb_p16',
    'mendel_logMb_p84', 'mendel_logMd_p50', 'mendel_logMd_p16', 'mendel_logMd_p84',
    'simard_b_t_g', 'simard_e_b_t_g', 'simard_b_t_r', 'simard_e_b_t_r', 'simard_Rhlg',
    'simard_Rhlr', 'simard_Rchl_g', 'simard_Rchl_r', 'simard_Re', 'simard_e_Re',
    'simard_e', 'simard_e_e', 'simard_nb', 'simard_e_nb', 'simard_PpS', 'simard_Pn4'
]

# Step 5: Handle Missing Values
data.fillna(data.mean(), inplace=True)

# Step 6: Normalize Features and Target using Min-Max Normalization
scaler_features = MinMaxScaler()
features = scaler_features.fit_transform(data[feature_columns])

scaler_target = MinMaxScaler()
target = scaler_target.fit_transform(data[[target_column]]).flatten()

# Step 7: Dimensionality Reduction using PCA
pca = PCA(n_components=4)
features_reduced = pca.fit_transform(features)

# Step 8: Define Quantum Device and Circuit
n_qubits = 4
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev)
def quantum_circuit(x):
    for i in range(n_qubits):
        qml.RX(x[i], wires=i)
        qml.RY(x[i], wires=i)
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

def quantum_feature_map(x):
    return np.array(quantum_circuit(x))

# Step 9: Define Quantum Linear Regression Model
class QuantumLinearRegression:
    def __init__(self, n_features):
        self.n_features = n_features
        self.weights = np.zeros(n_features, requires_grad=True)

    def predict(self, X):
        return np.dot(X, self.weights)

    def cost(self, X, y):
        predictions = self.predict(X)
        return np.mean((predictions - y) ** 2)

    def fit(self, X, y, epochs=100, lr=0.01):
        opt = qml.GradientDescentOptimizer(stepsize=lr)
        for epoch in range(epochs):
            self.weights = opt.step(lambda w: self.cost(X, y), self.weights)

# Step 10: Run Quantum Linear Regression with 3-Fold Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
r2_scores, mae_scores, mse_scores, rmse_scores = [], [], [], []

for train_index, test_index in kf.split(features_reduced):
    X_train, X_test = features_reduced[train_index], features_reduced[test_index]
    y_train, y_test = target[train_index], target[test_index]

    X_train_quantum = np.array([quantum_feature_map(x) for x in X_train])
    X_test_quantum = np.array([quantum_feature_map(x) for x in X_test])

    model = QuantumLinearRegression(n_features=n_qubits)
    model.fit(X_train_quantum, y_train, epochs=100, lr=0.01)

    y_pred = model.predict(X_test_quantum)

    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    r2_scores.append(r2)
    mae_scores.append(mae)
    mse_scores.append(mse)
    rmse_scores.append(rmse)

# Step 11: Calculate Error Metrics
r2_mean = np.mean(r2_scores)
mae_mean, mae_std = np.mean(mae_scores), np.std(mae_scores)
mse_mean, mse_std = np.mean(mse_scores), np.std(mse_scores)
rmse_mean, rmse_std = np.mean(rmse_scores), np.std(rmse_scores)

mae_accuracy = (1 - mae_mean / (np.max(target) - np.min(target))) * 100
mse_accuracy = (1 - mse_mean / (np.max(target) - np.min(target))) * 100
rmse_accuracy = (1 - rmse_mean / (np.max(target) - np.min(target))) * 100



In [None]:
print(f"MAE: {mae_mean:.4f} ± {mae_std:.4f} ({mae_accuracy:.2f}%)")
print(f"MSE: {mse_mean:.4f} ± {mse_std:.4f} ({mse_accuracy:.2f}%)")
print(f"RMSE: {rmse_mean:.4f} ± {rmse_std:.4f} ({rmse_accuracy:.2f}%)")

MAE: 0.4628 ± 0.0016 (53.72%)
MSE: 0.2301 ± 0.0014 (76.99%)
RMSE: 0.4797 ± 0.0015 (52.03%)
