In [1]:
# Rewriting amazon dataset
def less_than_8(string):
    if len(string) < 8:
        return True
    else:
        return False

def other_read_data(filename):
    labels, sentences = [], []
    with open(filename) as f:
        for line in f:
            sentence = line[:].strip()
            if less_than_8(sentence.split()):
                sentences.append(sentence[:-2])
                t = int(sentence[-1])
                labels.append([t, 1-t])
    return labels, sentences

train_labels, train_data = other_read_data('amazon_train_fr.txt')

In [54]:
import pandas as pd

mydf = pd.read_csv("amazon_train.csv")
train_data = mydf.iloc[:, 0]
train_data = list(train_data)
train_labels1 = mydf.iloc[:, 1]
train_labels = []
for i in range(len(train_labels1)):
    train_labels.append([train_labels1[i], 1-train_labels1[i]])
train_labels = list(train_labels)
    
newdf = pd.read_csv("amazon_test.csv")
val_data = newdf.iloc[:, 0]
val_data = list(val_data)
val_labels1 = newdf.iloc[:, 1]
val_labels = []
for i in range(len(val_labels1)):
    val_labels.append([val_labels1[i], 1-val_labels1[i]])
val_labels = list(val_labels)

In [55]:
import os
import warnings

warnings.filterwarnings('ignore')
os.environ['TOKENIZERS_PARALLELISM'] = 'true'


import numpy as np

BATCH_SIZE = 30
EPOCHS = 50
SEED = 2

test_labels, test_data = other_read_data('amazon_test_fr.txt')

In [None]:
from lambeq import BobcatParser

parser = BobcatParser(root_cats=('NP', 'N'), verbose='text')

raw_train_diagrams = parser.sentences2diagrams(train_data, suppress_exceptions=True)
raw_val_diagrams = parser.sentences2diagrams(val_data, suppress_exceptions=True)

Tagging sentences.
Parsing tagged sentences.
Turning parse trees to diagrams.


In [None]:
train_diagrams = [
    diagram.normal_form()
    for diagram in raw_train_diagrams if diagram is not None
]
val_diagrams = [
    diagram.normal_form()
    for diagram in raw_val_diagrams if diagram is not None
]

train_labels = [
    label for (diagram, label)
    in zip(raw_train_diagrams, train_labels)
    if diagram is not None]
val_labels = [
    label for (diagram, label)
    in zip(raw_val_diagrams, val_labels)
    if diagram is not None
]

In [None]:
from lambeq import AtomicType, IQPAnsatz, remove_cups

ansatz = IQPAnsatz({AtomicType.NOUN: 1, AtomicType.SENTENCE: 0},
                   n_layers=1, n_single_qubit_params=1)

train_circuits = []
trainlabels = []
for i in range(len(train_diagrams)):
    try:
        mycircuit = ansatz(remove_cups(train_diagrams[i]))
        train_circuits.append(mycircuit)
        trainlabels.append(train_labels[i])
    except ValueError:
        print(train_data[i])
    except KeyError:
        print(train_data[i])
print("END OF TRAINING SENTENCES")
val_circuits = []
vallabels = []
for i in range(len(val_diagrams)):
    try:
        mycircuit = ansatz(remove_cups(val_diagrams[i]))
        val_circuits.append(mycircuit)
        vallabels.append(val_labels[i])
    except ValueError:
        print(val_data[i])
        print(val_labels[i])
    except KeyError:
        print(val_data[i])
        print(val_labels[i])

In [None]:
from pytket.extensions.qiskit import AerBackend
from lambeq import TketModel

all_circuits = train_circuits + val_circuits

backend = AerBackend()
backend_config = {
    'backend': backend,
    'compilation': backend.default_compilation_pass(2),
    'shots': 8192
}

model = TketModel.from_diagrams(all_circuits, backend_config=backend_config)

In [None]:
from lambeq import BinaryCrossEntropyLoss

# Using the builtin binary cross-entropy error from lambeq
bce = BinaryCrossEntropyLoss()

acc = lambda y_hat, y: np.sum(np.round(y_hat) == y) / len(y) / 2  # half due to double-counting
eval_metrics = {"acc": acc}

In [None]:
from lambeq import QuantumTrainer, SPSAOptimizer

trainer = QuantumTrainer(
    model,
    loss_function=bce,
    epochs=50,
    optimizer=SPSAOptimizer,
    optim_hyperparams={'a': 0.05, 'c': 0.06, 'A':0.01*EPOCHS},
    evaluate_functions=eval_metrics,
    evaluate_on_train=True,
    verbose = 'text',
    seed=0
)

In [None]:
from lambeq import Dataset

train_dataset = Dataset(
            train_circuits,
            trainlabels,
            batch_size=BATCH_SIZE)

val_dataset = Dataset(val_circuits, vallabels, shuffle=False)

In [None]:
trainer.fit(train_dataset, val_dataset, evaluation_step=1, logging_step=5)

In [None]:
import matplotlib.pyplot as plt

fig, ((ax_tl, ax_tr), (ax_bl, ax_br)) = plt.subplots(2, 2, sharex=True, sharey='row', figsize=(10, 6))
ax_tl.set_title('Training set')
ax_tr.set_title('Development set')
ax_bl.set_xlabel('Iterations')
ax_br.set_xlabel('Iterations')
ax_bl.set_ylabel('Accuracy')
ax_tl.set_ylabel('Loss')

colours = iter(plt.rcParams['axes.prop_cycle'].by_key()['color'])
ax_tl.plot(trainer.train_epoch_costs[::10], color=next(colours))
ax_bl.plot(trainer.train_results['acc'][::10], color=next(colours))
ax_tr.plot(trainer.val_costs[::10], color=next(colours))
ax_br.plot(trainer.val_results['acc'][::10], color=next(colours))

# print test accuracy
test_acc = acc(model(val_circuits), val_labels)
print('Validation accuracy:', test_acc.item())