# CQC ML on MNIST 

In [1]:
import numpy as np
import numpy.linalg as LA
import matplotlib.pyplot as plt
from qiskit.quantum_info import Statevector
import matplotlib
from sklearn.datasets import fetch_openml
matplotlib.rcParams['mathtext.fontset'] = 'stix'
matplotlib.rcParams['font.family'] = 'STIXGeneral'
%matplotlib inline

In [2]:
"""Learning parameters"""
TRAIN_SZ = 10000
TEST_SZ = 2000
HUGE = False
ADD = 2

In [3]:
"""Import the MNIST dataset"""
mnist = fetch_openml('mnist_784')

In [8]:
index_number= np.random.permutation(70000)
x1, y1 = mnist.data.loc[index_number], mnist.target.loc[index_number]
x1.reset_index(drop=True,inplace=True)
y1.reset_index(drop=True,inplace=True)
y1 = np.array(list(map(int, y1.to_numpy())))

# y1_temp = np.zeros((len(y1), 2**4)) * 0.0
# for i in range(len(y1)):
#     y1_temp[i,y1[i]] = 1.0
# y1 = y1_temp

# idxs = np.logical_not((y1 == 8) + (y1 == 9))
# x1 = x1[idxs]; y1 = y1[idxs]
y1_temp = np.zeros((len(y1), 10))
for i in range(len(y1)):
    y1_temp[i,y1[i]] = 1.0
y1 = y1_temp

x_train , x_test = x1[:TRAIN_SZ], x1[TRAIN_SZ:TRAIN_SZ+TEST_SZ]
y_train , y_test = y1[:TRAIN_SZ], y1[TRAIN_SZ:TRAIN_SZ+TEST_SZ]

In [9]:
"""What are the dimensions?"""
x_train.shape

(10000, 784)

## Classical preparation

In [10]:
"""Run PCA to reduce the dataset from 784 DOF to, say, 16"""
from sklearn.decomposition import PCA
L = 8
pca = PCA(n_components=2**L if HUGE else 2*L)
reduced_train = pca.fit_transform(x_train)
reduced_test = pca.transform(x_test)
print(reduced_train.shape)

(10000, 16)


## Save output as a statevector

In [11]:
def PCA_to_state(L, vals, add=0):
    # np.random.shuffle(vals) # scramble principal components
    if HUGE:
        vals /= LA.norm(vals)
        return Statevector(vals)
    else:
        thetas = vals[:L] - np.min(vals[:L])
        phis = vals[L:] - np.min(vals[L:])
        thetas = thetas / np.max(thetas) * np.pi
        phis = phis / np.max(phis) * np.pi
        states = [Statevector([np.cos(theta/2), np.exp(1j * phi) * np.sin(theta/2)]) for theta, phi in zip(thetas, phis)]
        final_state = states[0]
        for state in states[1:]:
            final_state = final_state.tensor(state)
        for _ in range(add):
            final_state = final_state.tensor([1,0])
        return final_state
        

In [12]:
train_states = np.array([PCA_to_state(L, vals, add=ADD) for vals in reduced_train])
test_states = np.array([PCA_to_state(L, vals, add=ADD) for vals in reduced_test])
print(train_states.shape)

(10000, 1024)


In [16]:
ROOT = "../../daqml_large_files/MNIST/"
hh = ""
np.save(f"{ROOT}/{hh}x_train_full{'_HUGE' if HUGE else ''}.npy", train_states)
np.save(f"{ROOT}/{hh}y_train_full{'_HUGE' if HUGE else ''}.npy", y_train)
np.save(f"{ROOT}/{hh}x_test_full{'_HUGE' if HUGE else ''}.npy", test_states)
np.save(f"{ROOT}/{hh}y_test_full{'_HUGE' if HUGE else ''}.npy", y_test)
print("Done")

Done
