In [2]:
import numpy as np

In [3]:
import pandas as pd
from qiskit import QuantumCircuit, QuantumRegister, ClassicalRegister, execute, Aer
from qiskit.circuit import Parameter
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, classification_report
from sklearn.utils.class_weight import compute_class_weight

In [13]:
def load_german_credit_data(file_path='statloggermancreditdata/german.data'):
    """
    Load and preprocess the German Credit dataset.
    """
    columns = [
        'status', 'duration', 'credit_history', 'purpose', 'credit_amount',
        'savings', 'employment', 'installment_rate', 'personal_status_sex',
        'other_debtors', 'residence_since', 'property', 'age',
        'other_installment_plans', 'housing', 'existing_credits',
        'job', 'num_dependents', 'telephone', 'foreign_worker', 'class'
    ]
    
    df = pd.read_csv(file_path, sep=' ', names=columns)
    
    numerical_cols = ['duration', 'credit_amount', 'installment_rate', 
                     'residence_since', 'age', 'existing_credits', 
                     'num_dependents']
    categorical_cols = [col for col in df.columns if col not in numerical_cols]
#     print(categorical_cols)
    df_processed = df.copy()
#     print(df_processed)
    # Scale numerical features
    scaler = StandardScaler()
    df_processed[numerical_cols] = scaler.fit_transform(df_processed[numerical_cols])
#     print(df_processed[numerical_cols])
    # Encode categorical features
    label_encoders = {}
    for col in categorical_cols:
        if col != 'class':
            label_encoders[col] = LabelEncoder()
            df_processed[col] = label_encoders[col].fit_transform(df_processed[col])
#     print(df_processed)
    # Convert class labels (1: Good, 2: Bad) to binary (0: Good, 1: Bad)
    df_processed['class'] = df_processed['class'].map({1: 0, 2: 1})  # Reversed mapping for focus on bad credit
    
    X = df_processed.drop('class', axis=1).values
#     print(X)
    y = df_processed['class'].values
#     print(scaler)
    return X, y, scaler, label_encoders

In [41]:
class QRCCreditRisk:
    def __init__(self, n_qubits=6, reservoir_steps=7):  # Increased complexity
        self.n_qubits = n_qubits
        self.reservoir_steps = reservoir_steps
        self.params = [Parameter(f'θ_{i}') for i in range(n_qubits * 2)]  # Doubled parameters
        self.threshold = 0.3  # Lower threshold to favor positive class
    
    def create_reservoir_circuit(self, input_data):
        qr = QuantumRegister(self.n_qubits, 'q')#6
        cr = ClassicalRegister(self.n_qubits, 'c')#6
        qc = QuantumCircuit(qr, cr)
        print(input_data)
#         print(input_data)
        #input data is sample and it is each row value of X that values other than Result Class
        # Enhanced input encoding 
        for i in range(self.n_qubits):
            print(i % len(input_data))
            print(input_data[i % len(input_data)])
            print(input_data[i % len(input_data)] * np.pi)
            qc.rx(input_data[i % len(input_data)] * np.pi, qr[i])
            #len input_data is length of row,i should be from 1 to 6 and np.pi (ie .1 * pi gives a degree)is pi value for rotation with qr qc.rx(theta, qubit)
            qc.rz(input_data[(i + 1) % len(input_data)] * np.pi, qr[i])
            #similar to above
#         display(qc.draw(output='mpl',reverse_bits=True))
#         Create reservoir layers with enhanced connectivity
        for step in range(self.reservoir_steps):
            # All-to-all connectivity n_qubits = 6 so 6 operation actualy its 6-1
            for i in range(self.n_qubits):
                for j in range(i + 1, self.n_qubits):
                    qc.cx(qr[i], qr[j])
                qc.barrier()
            
            # Double rotation layers with parameters loops only 6
            for i in range(self.n_qubits):
                qc.ry(self.params[i], qr[i])
                qc.rz(self.params[i + self.n_qubits], qr[i])
                #both shoul dbe an angle value from current parameters
            # Non-linear transformation
            for i in range(self.n_qubits):
                qc.rz(np.pi/2, qr[i])
                #shouldbe an angle value from current parameters
        qc.measure(qr, cr)
#         display(qc.draw(output='mpl',reverse_bits=True))
        return qc
    
    def get_reservoir_states(self, X, param_values):
        reservoir_states = []
        backend = Aer.get_backend('qasm_simulator')
#         qc = self.create_reservoir_circuit(X[0])
        for sample in X:
            qc = self.create_reservoir_circuit(sample)
#             must draw graph for each row value with 7 serial data bariier
            param_dict = dict(zip(self.params, param_values))#[0_1,0_2],with random param_value,generate a key value pair
            bound_qc = qc.bind_parameters(param_dict)
            
            job = execute(bound_qc, backend, shots=1000)
            result = job.result()
            counts = result.get_counts(bound_qc)
            
            state_vector = np.zeros(2**self.n_qubits)#create an array with 0 as value of size 2 raise to n_qubits(6)
            for state, count in counts.items():
                state_vector[int(state, 2)] = count/1000
            #set value to each value of above array
            reservoir_states.append(state_vector)
            #add that value to reservoir states
        return np.array(reservoir_states)
    
    def train(self, X, y, param_values=None):
        #random param value from 360 degree,(pi) and size of self param ie 12
        if param_values is None:
            param_values = np.random.uniform(0, 2*np.pi, size=len(self.params))
            #full rotation random value form 0 to 2*np.pi ie 6.3 seems to 360 * array value
        # Calculate class weights
        class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
        # y contain 0 and 1 only compute class weight create a weight for all y valuse and get clas_weight of 0 and 1
#         print("class weight",class_weights)
        sample_weights = np.where(y == 1, class_weights[1], class_weights[0])
#         print("sample_weights weight",sample_weights) all weight where y=1 then this value is class_weight[1]
        # Get reservoir states
        reservoir_states = self.get_reservoir_states(X, param_values)
        
        # Weighted training
        weighted_states = reservoir_states * sample_weights[:, np.newaxis]
        weighted_y = y * sample_weights
        
        # Train readout layer with weighted samples
        self.readout_weights = np.linalg.pinv(weighted_states) @ weighted_y
        self.trained_param_values = param_values
    
    def predict(self, X):
        reservoir_states = self.get_reservoir_states(X, self.trained_param_values)
        predictions = reservoir_states @ self.readout_weights
        return (predictions > self.threshold).astype(int)  # Using lower threshold

In [18]:
def optimize_threshold(model, X_val, y_val):
    """
    Optimize threshold to maximize recall while maintaining reasonable precision
    """
    reservoir_states = model.get_reservoir_states(X_val, model.trained_param_values)
    raw_predictions = reservoir_states @ model.readout_weights
    
    best_recall = 0
    best_threshold = 0.5
    
    for threshold in np.arange(0.1, 0.7, 0.05):
        y_pred = (raw_predictions > threshold).astype(int)
        recall = recall_score(y_val, y_pred)
        
        if recall > 0.5:  # We found a threshold that meets our goal
            best_threshold = threshold
            best_recall = recall
            break
            
    return best_threshold

In [43]:
def main():
    # Load and preprocess data
    print("Loading and preprocessing German Credit dataset...")
    X, y, scaler, label_encoders = load_german_credit_data('statloggermancreditdata/german.data')
#   y  is Class good or bad in 0,1
#   X contains other data
    # Split the data with validation set
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
#     print(X_train)
# x is value and y is result value here it is Class
#   splitting for validation
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
#     print(y_test)
    # Initialize and train QRC model
    print("Training QRC model...")
    qrc_model = QRCCreditRisk(n_qubits=6, reservoir_steps=7)
    qrc_model.train(X_train, y_train)
#     n_qubits be changed to 20
    # Optimize threshold on validation set
    print("Optimizing threshold...")
    best_threshold = optimize_threshold(qrc_model, X_val, y_val)
    qrc_model.threshold = best_threshold
    print(f"Optimal threshold: {best_threshold:.3f}")
    
    # Make predictions on test set
    print("Making predictions...")
    y_pred = qrc_model.predict(X_test)
    
    # Evaluate results
    print("\nModel Performance:")
    print("\nClassification Report:")
#     print(classification_report(y_test, y_pred))
    
    # Print recall specifically
    recall = recall_score(y_test, y_pred)
    print(f"\nRecall for bad credit risk: {recall:.3f}")
    
    if recall > 0.5:
        print("✓ Achieved recall goal (> 0.5)")
    else:
        print("✗ Did not achieve recall goal (> 0.5)")

if __name__ == "__main__":
    main()

SyntaxError: invalid syntax (2237798003.py, line 18)