In [1]:
import numpy as np
import pandas as pd
from osgeo import gdal
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from qiskit_aer import Aer
from qiskit.circuit.library import ZZFeatureMap, PauliFeatureMap
from qiskit_machine_learning.algorithms import QSVC
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit.primitives import Sampler
import seaborn as sns
import joblib
import rasterio
from rasterio import Affine
from scipy.stats import describe
from tqdm.notebook import tqdm
import time

import gc
import csv

from itertools import product
import os  # For extracting file name

from qiskit_machine_learning.datasets import ad_hoc_data
from qiskit_machine_learning.algorithms import QSVC
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_algorithms.state_fidelities import ComputeUncompute

In [None]:
data = pd.read_csv('output_with_int_labels4.csv')
data.head()
data.describe().transpose()


X = data.iloc[:,:-1].values
y = data['label'].values

from scipy.stats import describe
#print(describe(y))


scaler = StandardScaler()
X = scaler.fit_transform(X)
pca = PCA(n_components = 2)
X_pca = pca.fit_transform(X)
#print(describe(X_pca))
XPca_df = pd.DataFrame(X_pca)
#print(XPca_df)
XPca_df.describe().transpose()
mms = MinMaxScaler((-1, 1))
Xn = mms.fit_transform(X_pca)


X_subset, _, y_subset, _ = train_test_split(Xn, y, train_size=1000, random_state=42)  # Select 4000 samples first

X_train, X_test, y_train, y_test = train_test_split(X_subset, y_subset, train_size=200, random_state=43)

#print(describe(X_train))
#print(describe(X_test))

df2 = pd.read_csv('test_all.csv')
df2.head()
df2.describe().transpose()

tx = df2.iloc[:,:-1].values
ty = df2['label'].values


#print(describe(ty))

tx = scaler.fit_transform(tx)

txpca = pca.fit_transform(tx)
#print(describe(txpca))

mms = MinMaxScaler((-1, 1))
txmms = mms.fit_transform(txpca)


# Define arrays for parameters
pau1 = ['X', 'Y', 'Z']
pau2 = ['X', 'Y', 'Z']
pau3 = ['XY', 'XZ', 'YX']
reps = [1, 2, 3, 4]
ent = ['full', 'linear', 'circular', 'reverse_linear', 'sca']



results = []

In [None]:
# Wrap the loop with tqdm to show progress
for pauli1, pauli2, pauli3, rep, entanglement in tqdm(product(pau1, pau2, pau3, reps, ent), desc="Processing combinations", unit="combination"):
#for pauli, rep, entanglement in product(pau, reps, ent):
    # Create the PauliFeatureMap with the current parameters
    feature_map = PauliFeatureMap(
        feature_dimension=2,
        entanglement=entanglement,
        reps=rep,
        paulis=[pauli1, pauli2, pauli3]
    )
    
    # Generate the plot
    fig, ax = plt.subplots()  # Create a Matplotlib figure and axes
    feature_map.decompose().draw('mpl', ax=ax)  # Draw the circuit on the axes
    
    # Create the file name based on the current parameters
    file_name = f'P3_{pauli1}_{pauli2}_{pauli3}_{entanglement}_{rep}'
    #file_path = f'/home/sudikin/EDU/IIRS/ML/Quantum process/see_all/{file_name}'
    
    # Set the file name as title at the top of the frame (not the axes)
    fig.suptitle(file_name, fontsize=8, wrap=True)  # Set title at the top of the frame
    
    # Save the figure
    fig.savefig(f'output/circuit/ckt_{file_name}.png', dpi=300)
    
    # Close the figure to free up memory
    plt.close(fig)

    plt.clf()

    quantum_kernel = FidelityQuantumKernel(feature_map = feature_map)
    classifier = QSVC(quantum_kernel = quantum_kernel)

    start_time = time.time()


    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)

    # Store results in the results list
    end_time = time.time()  # End timer
    runtime = end_time - start_time  # Calculate runtime

    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    print(classification_report(y_test, y_pred))

    plt.hist(y_pred, bins=np.arange(y_pred.min(), y_pred.max() + 2) - 0.5, edgecolor='black', alpha=0.7)
    plt.title(f"{file_name}\nHistogram Array in train test")
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.xticks(np.arange(y_pred.min(), y_pred.max() + 1))  # Set ticks at integer positions
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    #plt.show()
    plt.savefig(f'output/tthist_out/tthist_{file_name}.png', dpi=300)

    plt.clf()
    # Print accuracy
    acc1 = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {acc1}")

    # Generate classification report as a DataFrame
    report = classification_report(y_test, y_pred, output_dict=True)
    df = pd.DataFrame(report).transpose()

    f1_score_train = df.loc['weighted avg', 'f1-score']
    precision_train = df.loc['weighted avg', 'precision']
    recall_train = df.loc['weighted avg', 'recall']
    # Create a heatmap for the classification report
    plt.figure(figsize=(10, 6))
    sns.heatmap(df.iloc[:-1, :-1], annot=True, cmap='Blues', fmt='.2f', cbar=False)

    plt.title(f"{file_name}\n Test Train Classification Report \n (Accuracy: {acc1:.2f})")
    plt.xlabel("Metrics")
    plt.ylabel("Classes")
    plt.tight_layout()


    # Assuming `classifier` is your trained SVM classifier
    joblib.dump(classifier, f'output/model_out/model_{file_name}.pkl')
    # Save as an image
    plt.savefig(f'output/cla_train_test/cla_{file_name}.png', dpi=300)

    # Show the plot
    #plt.show()
    plt.clf()


    typred = classifier.predict(txmms)
    print(f"Accuracy: {accuracy_score(ty, typred)}")
    #print(classification_report(ty, typred))

    plt.hist(typred, bins=np.arange(typred.min(), typred.max() + 2) - 0.5, edgecolor='black', alpha=0.7)
    plt.title(f"{file_name}\nHistogram Array")
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.xticks(np.arange(typred.min(), typred.max() + 1))  # Set ticks at integer positions
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    #plt.show()
    plt.savefig(f'output/hist_out/hist_{file_name}.png', dpi=300)

    plt.clf()

    # Print accuracy
    acc2 = accuracy_score(ty, typred)
    #print(f"Accuracy: {accuracy}")

    # Generate classification report as a DataFrame
    report = classification_report(ty, typred, output_dict=True)
    df = pd.DataFrame(report).transpose()

    f1_score_test = df.loc['weighted avg', 'f1-score']
    precision_test = df.loc['weighted avg', 'precision']
    recall_test = df.loc['weighted avg', 'recall']


    # Create a heatmap for the classification report
    plt.figure(figsize=(10, 6))
    sns.heatmap(df.iloc[:-1, :-1], annot=True, cmap='Blues', fmt='.2f', cbar=False)

    plt.title(f"{file_name}\nNew data Classification Report \n (Accuracy: {acc2:.2f})")

    plt.xlabel("Metrics")
    plt.ylabel("Classes")
    plt.tight_layout()

    # Output file path
    output_tiff_path = f"output/out_new_tif/ys_{file_name}.tiff"

    # Save as an image
    plt.savefig(f'output/cla_new_data/clt_{file_name}.png', dpi=300)

    # Show the plot
    #plt.show()
    plt.clf()


    # Input file path
    input_tiff_path = "data/labelsmall.tif"




    # Read the geoinformation from the input TIFF
    with rasterio.open(input_tiff_path) as src:
        # Get the metadata from the source file
        metadata = src.meta.copy()

        # Get the shape of the raster
        height, width = src.height, src.width

    # Ensure the array has the same size as the raster shape
    if typred.size != height * width:
        raise ValueError(f"Size mismatch: predicted array ({typred.size}) does not match raster dimensions ({height}x{width}).")

    # Reshape the predicted array to match the raster shape
    typred_reshaped = typred.reshape(height, width)

    # Update metadata for the new TIFF
    metadata.update({
        "dtype": rasterio.uint8,  # Adjust the dtype as needed
        "count": 1,               # Single band
    })

    # Write the new TIFF file
    with rasterio.open(output_tiff_path, "w", **metadata) as dst:
        dst.write(typred_reshaped, 1)

    print(f"New GeoTIFF saved at: {output_tiff_path}")

    results.append([pauli1, pauli2, pauli3, rep, entanglement, acc1, f1_score_train, precision_train, recall_train, acc2, f1_score_test, precision_test, recall_test, runtime])
     # Clear memory for variables that won't be reused
    del feature_map
    del classifier
    del y_pred
    del typred
    del report
    del df

    # Manually run garbage collection to free up memory
    gc.collect()

    # Save results to CSV after the loop is complete
    df_results2 = pd.DataFrame(results, columns=['pauli1', 'pauli2', 'pauli3', 'rep', 'entanglement', 'acc1', 'f1_score_train', 'precision_train', 'recall_train', 'acc2', 'f1_score_test', 'precision_test', 'recall_test', 'runtime'])
    df_results2.to_csv('output/P3_2_results_mid.csv', index=False)

    



Processing combinations: 0combination [00:00, ?combination/s]

In [None]:
# Save results to CSV after the loop is complete
df_results = pd.DataFrame(results, columns=['pauli1', 'pauli2', 'pauli3', 'rep', 'entanglement', 'acc1', 'f1_score_train', 'precision_train', 'recall_train', 'acc2', 'f1_score_test', 'precision_test', 'recall_test', 'runtime'])
df_results.to_csv('output/P3_2_results.csv', index=False)

In [None]:
df_results# Convert the DataFrame back to a list of lists
results = df_results.to_numpy().tolist()


In [None]:
results

[['XX',
  1,
  'full',
  0.565,
  0.40795527156549527,
  0.319225,
  0.565,
  0.5363636363636364,
  0.3745024206562668,
  0.28768595041322315,
  0.5363636363636364,
  161.4901955127716],
 ['XX',
  1,
  'linear',
  0.565,
  0.40795527156549527,
  0.319225,
  0.565,
  0.5363636363636364,
  0.3745024206562668,
  0.28768595041322315,
  0.5363636363636364,
  163.22735571861267],
 ['XX',
  1,
  'circular',
  0.565,
  0.40795527156549527,
  0.319225,
  0.565,
  0.5363636363636364,
  0.3745024206562668,
  0.28768595041322315,
  0.5363636363636364,
  164.26364064216614],
 ['XX',
  1,
  'reverse_linear',
  0.565,
  0.40795527156549527,
  0.319225,
  0.565,
  0.5363636363636364,
  0.3745024206562668,
  0.28768595041322315,
  0.5363636363636364,
  164.74181461334229],
 ['XX',
  1,
  'sca',
  0.565,
  0.40795527156549527,
  0.319225,
  0.565,
  0.5363636363636364,
  0.3745024206562668,
  0.28768595041322315,
  0.5363636363636364,
  164.5306739807129],
 ['XX',
  2,
  'full',
  0.6725,
  0.6626834490