In [3]:
import os
os.environ['DWAVE_API_TOKEN'] = 'Actual-DW-key'

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset from CSV file
def load_dataset(filepath):
    df = pd.read_csv(filepath)
    X = df.iloc[:, :-1]  # All columns except the last one as features
    y = df.iloc[:, -1]   # The last column as the target (defect label)
    return X, y

# Preprocessing the data: normalize features
def preprocess_data(X):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled

# Split data into training and testing sets
def split_data(X, y, test_size=0.2, random_state=42):
    return train_test_split(X, y, test_size=test_size, random_state=random_state)

# Example usage
X, y = load_dataset('AEEM_JIRA/EQ.csv')
X_scaled = preprocess_data(X)
X_train, X_test, y_train, y_test = split_data(X_scaled, y)

In [2]:
from dwave.system import DWaveSampler, EmbeddingComposite
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Define a QUBO matrix for feature selection
def create_qubo(X, y):
    num_features = X.shape[1]
    
    # Define a Random Forest classifier for feature evaluation
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    
    # Initialize QUBO matrix (zeros)
    Q = np.zeros((num_features, num_features))
    
    # For each feature, calculate its importance and fill the QUBO matrix
    clf.fit(X, y)
    feature_importances = clf.feature_importances_
    
    # Objective: Select features that maximize their importance
    for i in range(num_features):
        Q[i, i] = -feature_importances[i]  # Diagonal terms (penalty for not selecting important features)
    
    # Add regularization terms to penalize the selection of too many features
    lambda_penalty = 0.1
    for i in range(num_features):
        Q[i, i] += lambda_penalty
    
    return Q

# Use a Quantum Annealer to solve the QUBO problem
def solve_qubo(Q):
    sampler = EmbeddingComposite(DWaveSampler())
    response = sampler.sample_qubo(Q, num_reads=100)
    solution = response.first.sample
    return solution

# Example usage
Q = create_qubo(X_train, y_train)
selected_features = solve_qubo(Q)

# Get the selected features from the solution
selected_feature_indices = [i for i in range(len(selected_features)) if selected_features[i] == 1]
X_train_selected = X_train[:, selected_feature_indices]
X_test_selected = X_test[:, selected_feature_indices]

ValueError: API token not defined