In [1]:
!pip install pennylane

Collecting pennylane
  Downloading PennyLane-0.41.1-py3-none-any.whl.metadata (10 kB)
Collecting rustworkx>=0.14.0 (from pennylane)
  Downloading rustworkx-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting appdirs (from pennylane)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting autoray>=0.6.11 (from pennylane)
  Downloading autoray-0.7.2-py3-none-any.whl.metadata (5.8 kB)
Collecting pennylane-lightning>=0.41 (from pennylane)
  Downloading pennylane_lightning-0.41.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (12 kB)
Collecting diastatic-malt (from pennylane)
  Downloading diastatic_malt-2.15.2-py3-none-any.whl.metadata (2.6 kB)
Collecting scipy-openblas32>=0.3.26 (from pennylane-lightning>=0.41->pennylane)
  Downloading scipy_openblas32-0.3.30.0.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.1/57.1 kB[0m [31m2.8 MB/s

In [2]:
import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import AdamOptimizer

from sklearn.model_selection import train_test_split
import pandas as pd

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

import math




Feature Map.

In [3]:
num_qubits = 4
num_layers = 2

dev = qml.device("default.qubit", wires=num_qubits)
# quantum circuit functions
def statepreparation(x):
    qml.BasisEmbedding(x, wires=range(0, num_qubits))

Ansatz

In [4]:
def layer(W):

    qml.Rot(W[0, 0], W[0, 1], W[0, 2], wires=0)
    qml.Rot(W[1, 0], W[1, 1], W[1, 2], wires=1)
    qml.Rot(W[2, 0], W[2, 1], W[2, 2], wires=2)
    qml.Rot(W[3, 0], W[3, 1], W[3, 2], wires=3)

    qml.CNOT(wires=[0, 1])
    qml.CNOT(wires=[1, 2])
    qml.CNOT(wires=[2, 3])
    qml.CNOT(wires=[3, 0])


In [5]:
@qml.qnode(dev, interface="autograd")
def circuit(weights, x):

    statepreparation(x)

    for W in weights:
        layer(W)

    return qml.expval(qml.PauliZ(0))

Salida del circuito cuántico sumado al sesgo.

In [6]:
def variational_classifier(weights, bias, x):
    return circuit(weights, x) + bias

Definición de la función de pérdida.

In [7]:

def square_loss(labels, predictions):
    loss = 0
    for l, p in zip(labels, predictions):
        loss = loss + (l - p) ** 2

    loss = loss / len(labels)
    return loss

Definición de la función de evaluación de desempeño.

In [8]:
def accuracy(labels, predictions):

    loss = 0
    for l, p in zip(labels, predictions):
        if abs(l - p) < 1e-5:
            loss = loss + 1
    loss = loss / len(labels)

    return loss

Definición de la función de evaluación de accuracy.

In [9]:
def accuracy(labels, predictions):

    loss = 0
    for l, p in zip(labels, predictions):
        if abs(l - p) < 1e-5:
            loss = loss + 1
    loss = loss / len(labels)

    return loss

Definición de la función de costo.

In [10]:
def cost(weights, bias, X, Y):
    predictions = [variational_classifier(weights, bias, x) for x in X]
    return square_loss(Y, predictions)

In [14]:
import zipfile

with zipfile.ZipFile('titanic.zip', 'r') as zip_ref:
    zip_ref.extractall('titanic_data')

# Verifica los archivos extraídos
import os

print("Archivos extraídos:")
print(os.listdir('titanic_data'))


Archivos extraídos:
['train.csv', 'gender_submission.csv', 'test.csv']


Preprocesamiento de los datos.

In [15]:
# preparaing data
df_train = pd.read_csv('/content/titanic_data/train.csv')

df_train['Pclass'] = df_train['Pclass'].astype(str)

df_train = pd.concat([df_train, pd.get_dummies(df_train[['Pclass', 'Sex', 'Embarked']])], axis=1)

# I will fill missings with the median
df_train['Age'] = df_train['Age'].fillna(df_train['Age'].median())

df_train['is_child'] = df_train['Age'].map(lambda x: 1 if x < 12 else 0)
cols_model = ['is_child', 'Pclass_1', 'Pclass_2', 'Sex_female']
# En este bloque se hace la separaación entre datos de prueba y datos de test del dataset.
X_train, X_test, y_train, y_test = train_test_split(df_train[cols_model], df_train['Survived'], test_size=0.10, random_state=42, stratify=df_train['Survived'])

X_train = np.array(X_train.values, requires_grad=False)
Y_train = np.array(y_train.values * 2 - np.ones(len(y_train)), requires_grad=False)


Inicialización de los parámetros.

In [16]:
np.random.seed(0)
weights_init = 0.01 * np.random.randn(num_layers, num_qubits, 3, requires_grad=True)
bias_init = np.array(0.0, requires_grad=True)

opt = AdamOptimizer(0.125)
num_it = 70
batch_size = math.floor(len(X_train)/num_it)

weights = weights_init
bias = bias_init

In [17]:
print("Los pesos iniciales son: ", weights_init)
print("Los bias iniciales son:",bias_init)

Los pesos iniciales son:  [[[ 0.01764052  0.00400157  0.00978738]
  [ 0.02240893  0.01867558 -0.00977278]
  [ 0.00950088 -0.00151357 -0.00103219]
  [ 0.00410599  0.00144044  0.01454274]]

 [[ 0.00761038  0.00121675  0.00443863]
  [ 0.00333674  0.01494079 -0.00205158]
  [ 0.00313068 -0.00854096 -0.0255299 ]
  [ 0.00653619  0.00864436 -0.00742165]]]
Los bias iniciales son: 0.0


Entrenamiento del circuito

In [20]:
for it in range(num_it):

    # Se cargan los datos y se aplica la función de optmización, con esta, es que se busca el parámetro que entregue el menor valor posible de la función de costo.
    batch_index = np.random.randint(0, len(X_train), (batch_size,))
    X_batch = X_train[batch_index]
    Y_batch = Y_train[batch_index]
    weights, bias, _, _ = opt.step(cost, weights, bias, X_batch, Y_batch)

    # CCon el sentido de medir un redimiento, se hacen predicciones para posteriormente evaluar el rendimiento con la función de accuracy.
    predictions = [np.sign(variational_classifier(weights, bias, x)) for x in X_train]
    acc = accuracy(Y_train, predictions)

    print(
        "Iter: {:5d} | Cost: {:0.7f} | Accuracy: {:0.7f} ".format(
            it + 1, cost(weights, bias, X_train, Y_train), acc
        )
    )

Iter:     1 | Cost: 2.3119722 | Accuracy: 0.3657928 
Iter:     2 | Cost: 2.0304556 | Accuracy: 0.3657928 
Iter:     3 | Cost: 1.7212568 | Accuracy: 0.3657928 
Iter:     4 | Cost: 1.4504211 | Accuracy: 0.3657928 
Iter:     5 | Cost: 1.2860336 | Accuracy: 0.5205993 
Iter:     6 | Cost: 1.2213677 | Accuracy: 0.6167291 
Iter:     7 | Cost: 1.1999638 | Accuracy: 0.6167291 
Iter:     8 | Cost: 1.1690077 | Accuracy: 0.6167291 
Iter:     9 | Cost: 1.1090363 | Accuracy: 0.6167291 
Iter:    10 | Cost: 1.0756152 | Accuracy: 0.6167291 
Iter:    11 | Cost: 1.0608010 | Accuracy: 0.6167291 
Iter:    12 | Cost: 1.0480405 | Accuracy: 0.6167291 
Iter:    13 | Cost: 1.0472376 | Accuracy: 0.6167291 
Iter:    14 | Cost: 1.0147893 | Accuracy: 0.6167291 
Iter:    15 | Cost: 0.9866451 | Accuracy: 0.6167291 
Iter:    16 | Cost: 0.9537489 | Accuracy: 0.6167291 
Iter:    17 | Cost: 0.9276301 | Accuracy: 0.6167291 
Iter:    18 | Cost: 0.9137055 | Accuracy: 0.6167291 
Iter:    19 | Cost: 0.9081582 | Accuracy: 0.61

organización de los datos de test.

In [21]:
X_test = np.array(X_test.values, requires_grad=False)
Y_test = np.array(y_test.values * 2 - np.ones(len(y_test)), requires_grad=False)

Predición del circuito

In [22]:
predictions = [np.sign(variational_classifier(weights, bias, x)) for x in X_test]


Pesos entrenados y bias.

In [23]:
print("Los pesos entrenados son: ",weights)
print("Los pesos entrenados son",bias)

Los pesos entrenados son:  [[[ 1.76405271e-02  1.02684800e+00 -3.66468004e-01]
  [ 2.24089343e-02  1.21367695e+00 -1.20272645e+00]
  [ 9.50088884e-03  2.18794057e-01  9.98257920e-01]
  [ 4.10598858e-03  4.07629221e-01 -8.92005111e-01]]

 [[ 7.61038089e-03  1.21675069e-03  4.43863596e-03]
  [-2.19312870e-01  1.75685419e+00 -2.05157979e-03]
  [ 1.28423496e+00 -2.07897847e+00 -2.55298958e-02]
  [ 1.56354438e+00  1.02956545e+00 -7.42164786e-03]]]
Los pesos entrenados son -0.2124249341568173


Métricas de presición del sistema.

In [24]:
accuracy_score(Y_test, predictions)
precision_score(Y_test, predictions)
recall_score(Y_test, predictions)
f1_score(Y_test, predictions, average='macro')

0.7712374581939799

Se puede ver, que el sistema tiene un rendimiento que aunque no es el mejor, para ciertos casos donde el error es relativamente aceptable puede ser aplicado, puesto que, este tiene una métrica relativamente buena, de un 0.77