In [None]:
pip install qiskit qiskit-machine-learning

Note: you may need to restart the kernel to use updated packages.


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from qiskit import QuantumCircuit
from qiskit.circuit.library import ZZFeatureMap, RealAmplitudes
from qiskit_machine_learning.circuit.library import QNNCircuit
from qiskit_machine_learning.algorithms import VQR
import matplotlib.pyplot as plt
from qiskit_algorithms.optimizers import COBYLA, L_BFGS_B

In [None]:
# 1. Mount the dataset
dataset_path = r"C:\Users\Satvik\Downloads\typeII_AGN_metadata.csv"  # Update the path as needed
data = pd.read_csv(dataset_path)

# 2. Define the target and feature columns
target_column = 'log_bh_mass'
feature_columns = [
    'h_beta_flux', 'h_beta_flux_err', 'oiii_5007_flux', 'oiii_5007_flux_err', 'h_alpha_flux',
    'h_alpha_flux_err', 'nii_6584_flux', 'nii_6584_flux_err', 'log_stellar_sigma',
    'psfMag_u', 'psfMag_g', 'psfMag_r', 'psfMag_i', 'psfMag_z',
    'psfMagErr_u', 'psfMagErr_g', 'psfMagErr_r', 'psfMagErr_i', 'psfMagErr_z',
    'mendel_logM_p50', 'mendel_logM_p16', 'mendel_logM_p84',
    'mendel_logMt_p50', 'mendel_logMt_p16', 'mendel_logMt_p84',
    'mendel_logMb_p50', 'mendel_logMb_p16', 'mendel_logMb_p84',
    'mendel_logMd_p50', 'mendel_logMd_p16', 'mendel_logMd_p84',
    'simard_b_t_g', 'simard_e_b_t_g', 'simard_b_t_r', 'simard_e_b_t_r',
    'simard_Rhlg', 'simard_Rhlr', 'simard_Rchl_g', 'simard_Rchl_r',
    'simard_Re', 'simard_e_Re', 'simard_e', 'simard_e_e',
    'simard_nb', 'simard_e_nb', 'simard_PpS', 'simard_Pn4'
]

X = data[feature_columns]
y = data[target_column]

# 6. Handle missing values by replacing them with the mean of the column
X.fillna(X.mean(), inplace=True)
y.fillna(y.mean(), inplace=True)

# 3. Normalize the features using Min-Max normalization
scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)

# Normalize the target using Min-Max normalization
scaler_y = MinMaxScaler()
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()  # Flatten to make it a 1D array

# 7. Reduce the number of features using PCA (retain the number of components as 4)
pca = PCA(n_components=4)
X_pca = pca.fit_transform(X_scaled)

# 9. Perform 3-fold cross-validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)

mse_scores = []
mae_scores = []
r2_scores = []
rmse_scores = []

accuracy_mae_scores = []
accuracy_mse_scores = []
accuracy_rmse_scores = []

for train_index, test_index in kf.split(X_pca):
    X_train, X_test = X_pca[train_index], X_pca[test_index]
    y_train, y_test = y_scaled[train_index], y_scaled[test_index]

    # 4. Define the quantum circuit
    num_qubits = 4  # Set number of qubits to 4
    feature_map = ZZFeatureMap(feature_dimension=num_qubits)
    ansatz = RealAmplitudes(num_qubits=num_qubits, reps=2)  # Increased repetitions for more complexity

    # 4. Define the VQR model with COBYLA optimizer
    vqr = VQR(
        num_qubits=num_qubits,
        feature_map=feature_map,
        optimizer=COBYLA(maxiter=100),  # Increased number of iterations
        ansatz=ansatz
    )

    # 4. Fit the model
    vqr.fit(X_train, y_train)

    # 8. Make predictions and evaluate the model
    y_pred = vqr.predict(X_test)

    # Calculate evaluation metrics for this fold
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mse)

    mse_scores.append(mse)
    mae_scores.append(mae)
    r2_scores.append(r2)
    rmse_scores.append(rmse)

    # Calculate accuracy based on error metrics
    accuracy_mae = 100 * (1 - mae / np.mean(y_test))
    accuracy_mse = 100 * (1 - mse / np.mean(y_test**2))
    accuracy_rmse = 100 * (1 - rmse / np.mean(y_test))

    accuracy_mae_scores.append(accuracy_mae)
    accuracy_mse_scores.append(accuracy_mse)
    accuracy_rmse_scores.append(accuracy_rmse)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.fillna(X.mean(), inplace=True)


In [None]:
# Display the results
print("Mean Squared Error:", np.mean(mse_scores))
print("Mean Absolute Error:", np.mean(mae_scores))
print("R2 Score:", np.mean(r2_scores))
print("Root Mean Squared Error:", np.mean(rmse_scores))

# Display the accuracy results
print("\nAccuracy based on MAE: {:.2f}%".format(np.mean(accuracy_mae_scores)))
print("Accuracy based on MSE: {:.2f}%".format(np.mean(accuracy_mse_scores)))
print("Accuracy based on RMSE: {:.2f}%".format(np.mean(accuracy_rmse_scores)))

Mean Squared Error: 0.11269172221345579
Mean Absolute Error: 0.26955648945877614
R2 Score: -6.058557493229119
Root Mean Squared Error: 0.33499827467690685

Accuracy based on MAE: 41.74%
Accuracy based on MSE: 51.01%
Accuracy based on RMSE: 27.60%
