In [1]:
import pandas as pd
import numpy as np
import csv
import pickle
from os import path, sys
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, StratifiedKFold, cross_validate
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, VotingClassifier


BRIGHT_QUBITS_DATASETS = [
    'Data4Jens/BrightTimeTagSet1.csv',
    'Data4Jens/BrightTimeTagSet2.csv',
    'Data4Jens/BrightTimeTagSet3.csv',
    'Data4Jens/BrightTimeTagSet4.csv',
    'Data4Jens/BrightTimeTagSet5.csv',
]

DARK_QUBITS_DATASETS = [
    'Data4Jens/DarkTimeTagSet1.csv',
    'Data4Jens/DarkTimeTagSet2.csv',
    'Data4Jens/DarkTimeTagSet3.csv',
    'Data4Jens/DarkTimeTagSet4.csv',
    'Data4Jens/DarkTimeTagSet5.csv',
]

RANDOM_SEED = 42

BEST_ARRIVAL_TIME_THRESHOLD = 0.00529914

PRE_ARRIVAL_TIME_THRESHOLD = 0.000722906  # from "Distribution of Photons Arrival Times" graph
POST_ARRIVAL_TIME_THRESHOLD = 0.00522625


def log(message):
    # sys.stderr.write(message + '\n')
    print(message, file=sys.stderr)


class Histogramize(BaseEstimator, TransformerMixin):
    def __init__(self, arrival_time_threshold=(0, BEST_ARRIVAL_TIME_THRESHOLD), num_buckets=6):
        self.arrival_time_threshold = arrival_time_threshold
        self.num_buckets = num_buckets
    
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        histogram_bins = np.linspace(
            self.arrival_time_threshold[0], self.arrival_time_threshold[1], 
            num=(self.num_buckets+1), endpoint=True)
        return list(map(
            lambda measurement: np.histogram(measurement, bins=histogram_bins)[0], X))


# MLP Classifier
def load_datasets():
    def load_datasets_with_ground_truth(qubits_datasets, ground_truth):
        qubits_measurements = []
        for dataset_filename in qubits_datasets:
            with open(dataset_filename, 'r') as dataset_file:
                log("Loading {}".format(dataset_filename))
                csv_reader = csv.reader(dataset_file)
                for line in csv_reader:
                    qubits_measurements.append(
                        np.array(list(map(lambda timestamp: float(timestamp), line)))
                    )
        qubits_ground_truths = [ground_truth for i in range(len(qubits_measurements))]
        return qubits_measurements, qubits_ground_truths
    
    bright_qubits_measurements, bright_qubits_ground_truths = load_datasets_with_ground_truth(BRIGHT_QUBITS_DATASETS, 0)
    dark_qubits_measurements, dark_qubits_ground_truths = load_datasets_with_ground_truth(DARK_QUBITS_DATASETS, 1)
    return (
        (bright_qubits_measurements + dark_qubits_measurements), 
        (bright_qubits_ground_truths + dark_qubits_ground_truths))


In [7]:
log("Starting MLPClassifier Grid Search with Cross Validation Method.")

qubits_measurements, qubits_truths = load_datasets()

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
qubits_class = []
assert(len(qubits_measurements) == len(qubits_truths))
for index in range(len(qubits_measurements)):
    qubits_class.append(qubits_truths[index] * 100 + len(qubits_measurements[index]))
cv_indices = kf.split(qubits_measurements, qubits_class)

log("Starting Grid Search with Cross Validation on MLP Classifier.")

mlp_pipeline = Pipeline([
    # ('hstgm', Histogramize(num_buckets=6)),
    # ('hstgm', Histogramize(arrival_time_threshold=(0, POST_ARRIVAL_TIME_THRESHOLD))),
    ('hstgm', Histogramize(num_buckets=6, arrival_time_threshold=(0, POST_ARRIVAL_TIME_THRESHOLD))),
    ('clf', MLPClassifier(activation='relu', solver='adam'))
])

mlp_param_grid = {
    # 'hstgm__num_buckets': range(1, 33),
    # 'hstgm__arrival_time_threshold': [(0, BEST_ARRIVAL_TIME_THRESHOLD), (0, POST_ARRIVAL_TIME_THRESHOLD)],
    'clf__hidden_layer_sizes': [(n,) * 2 for n in range(8, 9)]
    # 'clf__learning_rate_init': [0.001, 0.0005],
    # 'clf__max_iter': [200, 500]
}

mlp_grid = GridSearchCV(mlp_pipeline, cv=cv_indices, n_jobs=-1, param_grid=mlp_param_grid, scoring="accuracy", refit=True, verbose=2)
mlp_grid.fit(qubits_measurements, qubits_truths)

Starting MLPClassifier Grid Search with Cross Validation Method.
Loading Data4Jens/BrightTimeTagSet1.csv
Loading Data4Jens/BrightTimeTagSet2.csv
Loading Data4Jens/BrightTimeTagSet3.csv
Loading Data4Jens/BrightTimeTagSet4.csv
Loading Data4Jens/BrightTimeTagSet5.csv
Loading Data4Jens/DarkTimeTagSet1.csv
Loading Data4Jens/DarkTimeTagSet2.csv
Loading Data4Jens/DarkTimeTagSet3.csv
Loading Data4Jens/DarkTimeTagSet4.csv
Loading Data4Jens/DarkTimeTagSet5.csv
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Starting Grid Search with Cross Validation on MLP Classifier.
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:   57.5s remaining:  1.4min
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  1.7min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  1.7min finished


GridSearchCV(cv=<generator object _BaseKFold.split at 0x128fc07d0>,
             error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('hstgm',
                                        Histogramize(arrival_time_threshold=(0,
                                                                             0.00522625),
                                                     num_buckets=6)),
                                       ('clf',
                                        MLPClassifier(activation='relu',
                                                      alpha=0.0001,
                                                      batch_size='auto',
                                                      beta_1=0.9, beta_2=0.999,
                                                      early_stopping=False,
                                                      epsilon=1e-08,
                                                      hidden_layer_sizes=(100,),
 

In [20]:
# pickle.dump(mlp_grid, open('/tmp/pickle_test_mlp.pkl', 'wb'))
cv_indices = kf.split(qubits_measurements, qubits_class)
print(len(list(cv_indices)[0]))
# for train, test in cv_indices:
#     print(len(train), len(test))

2


In [23]:
# log(mlp_grid.cv_results_)
# print("Best parameters found in Grid Search:")
# print(mlp_grid.best_params_)
list(mlp_grid.cv_results_['mean_test_score'])

[0.999705527544356]

In [3]:
# run_mlp_with_cross_validation_average
qubits_measurements, qubits_truths = load_datasets()

mlp_pipeline = Pipeline([
        ('hstgm', Histogramize(num_buckets=6, arrival_time_threshold=(0, POST_ARRIVAL_TIME_THRESHOLD))),
        ('clf', MLPClassifier(hidden_layer_sizes=(32, 32), activation='relu', solver='adam'))
    ])

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
qubits_class = []
assert(len(qubits_measurements) == len(qubits_truths))
for index in range(len(qubits_measurements)):
    qubits_class.append(qubits_truths[index] * 100 + len(qubits_measurements[index]))
cv_indices = kf.split(qubits_measurements, qubits_class)

cv_scores = cross_validate(mlp_pipeline, qubits_measurements, qubits_truths, cv=cv_indices, scoring='accuracy', n_jobs=-1, verbose=2)
print("Scores of Cross Validation Method on MLPClassifier: ")
print(cv_scores)

Loading Data4Jens/BrightTimeTagSet1.csv
Loading Data4Jens/BrightTimeTagSet2.csv
Loading Data4Jens/BrightTimeTagSet3.csv
Loading Data4Jens/BrightTimeTagSet4.csv
Loading Data4Jens/BrightTimeTagSet5.csv
Loading Data4Jens/DarkTimeTagSet1.csv
Loading Data4Jens/DarkTimeTagSet2.csv
Loading Data4Jens/DarkTimeTagSet3.csv
Loading Data4Jens/DarkTimeTagSet4.csv
Loading Data4Jens/DarkTimeTagSet5.csv
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:  1.1min remaining:  1.6min
Scores of Cross Validation Method on MLPClassifier: 
{'fit_time': array([33.30896091, 29.87536621, 34.75128102, 28.94240999, 26.36837196]), 'score_time': array([3.5192461 , 3.59007597, 3.13400793, 2.70515418, 2.4573729 ]), 'test_score': array([0.99974169, 0.99971586, 0.99971586, 0.99969003, 0.99970294])}
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  1.8min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  1.8min fini

In [6]:
# sum(list(cv_scores['test_score'])) / len(list(cv_scores['test_score']))
cv_scores['test_score']

array([0.99974169, 0.99971586, 0.99971586, 0.99969003, 0.99970294])

In [4]:
qubits_measurements, qubits_truths = load_datasets()

Loading Data4Jens/BrightTimeTagSet1.csv
Loading Data4Jens/BrightTimeTagSet2.csv
Loading Data4Jens/BrightTimeTagSet3.csv
Loading Data4Jens/BrightTimeTagSet4.csv
Loading Data4Jens/BrightTimeTagSet5.csv
Loading Data4Jens/DarkTimeTagSet1.csv
Loading Data4Jens/DarkTimeTagSet2.csv
Loading Data4Jens/DarkTimeTagSet3.csv
Loading Data4Jens/DarkTimeTagSet4.csv
Loading Data4Jens/DarkTimeTagSet5.csv


In [5]:
max(list(map(lambda measurement: len(measurement), qubits_measurements)))

77