<a href="https://colab.research.google.com/github/mohitraosatya/quantum-feature-selection-qci/blob/main/QCI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install dwave-ocean-sdk numpy pandas scikit-learn xgboost matplotlib

Collecting dwave-ocean-sdk
  Downloading dwave_ocean_sdk-8.2.0-py3-none-any.whl.metadata (5.5 kB)
Collecting dimod==0.12.18 (from dwave-ocean-sdk)
  Downloading dimod-0.12.18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)
Collecting dwave-cloud-client==0.13.3 (from dwave-ocean-sdk)
  Downloading dwave_cloud_client-0.13.3-py3-none-any.whl.metadata (5.4 kB)
Collecting dwave-gate==0.3.3 (from dwave-ocean-sdk)
  Downloading dwave_gate-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting dwave-hybrid==0.6.13 (from dwave-ocean-sdk)
  Downloading dwave_hybrid-0.6.13-py3-none-any.whl.metadata (4.3 kB)
Collecting dwave-inspector==0.5.2 (from dwave-ocean-sdk)
  Downloading dwave_inspector-0.5.2-py3-none-any.whl.metadata (4.4 kB)
Collecting dwave-networkx==0.8.16 (from dwave-ocean-sdk)
  Downloading dwave_networkx-0.8.16-py3-none-any.whl.metadata (2.6 kB)
Collecting dwave-optimization==0.5.1 (from dwave-ocean-sdk)
  Downloading

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import accuracy_score
import xgboost as xgb

# Quantum Optimization Libraries (D-Wave for QUBO)
from dwave.system import DWaveSampler, EmbeddingComposite
import dimod

In [5]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls"

# Read the Excel file directly from the web
df = pd.read_excel(url, header=1)  # Skip first row as it contains description

# Display first few rows
df.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,1,20000,2,2,1,24,2,2,-1,-1,...,0,0,0,0,689,0,0,0,0,1
1,2,120000,2,2,2,26,-1,2,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,3,90000,2,2,2,34,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,4,50000,2,2,1,37,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,5,50000,1,2,1,57,-1,0,-1,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


In [6]:
# Assume last column is the target variable (default or not)
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Normalize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [8]:
from sklearn.feature_selection import SelectKBest, mutual_info_classif

k_best = SelectKBest(mutual_info_classif, k=10)
X_train_kbest = k_best.fit_transform(X_train, y_train)
X_test_kbest = k_best.transform(X_test)

# Train a classifier using selected features
clf = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
clf.fit(X_train_kbest, y_train)
y_pred = clf.predict(X_test_kbest)

# Check accuracy
print("Accuracy with SelectKBest (Mutual Information):", accuracy_score(y_test, y_pred))


Parameters: { "use_label_encoder" } are not used.



Accuracy with SelectKBest (Mutual Information): 0.8106666666666666


In [9]:
# Define number of features
num_features = X_train.shape[1]

# Initialize QUBO matrix
Q = np.zeros((num_features, num_features))

# Assign higher values to important features (using MI scores)
feature_importance = k_best.scores_  # Scores from SelectKBest (MI method)
for i in range(num_features):
    Q[i, i] = -feature_importance[i]  # Negative to maximize feature importance

# Feature redundancy penalty (avoid redundant selections)
lambda_penalty = 0.1
for i in range(num_features):
    for j in range(i + 1, num_features):
        Q[i, j] = lambda_penalty * abs(np.corrcoef(X_train[:, i], X_train[:, j])[0, 1])

print("QUBO Matrix Constructed")


QUBO Matrix Constructed


In [10]:
import dimod
from dwave.system import LeapHybridSampler

# Convert QUBO matrix to dictionary format
Q_dict = {(i, j): Q[i, j] for i in range(num_features) for j in range(num_features)}

# Use D-Wave's classical solver to simulate quantum annealing
sampler = dimod.ExactSolver()  # Can replace with LeapHybridSampler() for real quantum processing
response = sampler.sample_qubo(Q_dict)

# Extract best feature subset
best_selection = list(response.first.sample.values())

# Convert selection to feature index
selected_features_qfs = [i for i in range(num_features) if best_selection[i] == 1]
print("Selected Features (Quantum Feature Selection):", selected_features_qfs)


Selected Features (Quantum Feature Selection): [4, 6, 18]


In [11]:
# Select the quantum-optimized feature subset
X_train_qfs = X_train[:, selected_features_qfs]
X_test_qfs = X_test[:, selected_features_qfs]

# Train an XGBoost model
clf_qfs = xgb.XGBClassifier(eval_metric='mlogloss')
clf_qfs.fit(X_train_qfs, y_train)
y_pred_qfs = clf_qfs.predict(X_test_qfs)

# Compare accuracy
accuracy_qfs = accuracy_score(y_test, y_pred_qfs)
print("Accuracy with Quantum Feature Selection:", accuracy_qfs)

Accuracy with Quantum Feature Selection: 0.8176666666666667


In [12]:
print("Classical SelectKBest Accuracy:", accuracy_score(y_test, y_pred))
print("Quantum Feature Selection Accuracy:", accuracy_qfs)

Classical SelectKBest Accuracy: 0.8106666666666666
Quantum Feature Selection Accuracy: 0.8176666666666667
