# Data export and classification

In [1]:
import os
import numpy as np
import qcodes as qc
import nanotune as nt

from nanotune.data.export_data import correct_normalizations, export_data, prep_data
from nanotune.classification.classifier import Classifier, METRIC_NAMES
from nanotune.classification.utils import display_metrics

nt_root = os.path.dirname(os.path.dirname(os.path.abspath(nt.__file__)))

Logging hadn't been started.
Activating auto-logging. Current session state plus future input saved.
Filename       : /Users/jana/.qcodes/logs/command_history.log
Mode           : append
Output logging : True
Raw input log  : False
Timestamping   : True
State          : active
Qcodes Logfile : /Users/jana/.qcodes/logs/210811-78354-qcodes.log


## Export labelled data to a numpy file

In [2]:
export_data('pinchoff', ['device_characterization.db'], db_folder=os.path.join(nt_root, 'data', 'tuning'), filename='test_export_data')

## Train classifier with the data saved in the numpy file. 
Note that this is just to small demo and that the data in the file is not sufficient for real-life purposes.

In [3]:
pinchoff_classifier = Classifier(
        ['test_export_data.npy'],
        'pinchoff',
        data_types=["signal"],
        classifier_type="SVC",
        folder_path=os.path.join(nt_root, 'data', 'tuning'),
    )

In [4]:
pinchoff_classifier.train()

# Predict the quality of a dataset. 
Note that this is again just to show how the code works and that a classifier should not be used to predict data it was trained on.

In [5]:
pinchoff_classifier.predict(1203, db_name='device_characterization.db', db_folder=os.path.join(nt_root, 'data', 'tuning'))

[array([1])]

In [6]:
pinchoff_classifier = Classifier(
        ['pinchoff.npy'],
        'pinchoff',
        data_types=["signal"],
        classifier_type="SVC",
        folder_path=os.path.join(nt_root, 'data', 'training_data'),
    )

We can also compute metrics such as accuracy...

In [7]:
metrics_results, metrics_all_iterations = pinchoff_classifier.compute_metrics(save_to_file=False, n_iter=10)

..and display them.

In [8]:
display_metrics(metrics_results)

+--------------------------+---------------+---------------------------+
|          metric          |      mean     |            std            |
+--------------------------+---------------+---------------------------+
|      accuracy_score      |     0.865     |           0.018           |
|     brier_score_loss     |     0.135     |           0.018           |
|           auc            |     0.908     |           0.010           |
| average_precision_recall |     0.849     |           0.026           |
| average_precision_recall |  [[94.9 19.9] |  [[4.94873721 3.01496269] |
|                          |  [10.4 98.8]] |  [2.53771551 5.4       ]] |
+--------------------------+---------------+---------------------------+


In [9]:
metrics_results

{'n_iter': 10,
 'classifier': 'SVC',
 'category': 'pinchoff',
 'data_files': ['/Users/jana/Documents/code/nanotune/nanotune/data/training_data/pinchoff.npy'],
 'data_types': ['signal'],
 'hyper_parameters': {'C': 1.0,
  'break_ties': False,
  'cache_size': 200,
  'class_weight': None,
  'coef0': 0.0,
  'decision_function_shape': 'ovr',
  'degree': 3,
  'gamma': 'auto',
  'kernel': 'linear',
  'max_iter': -1,
  'probability': True,
  'random_state': None,
  'shrinking': True,
  'tol': 0.001,
  'verbose': False},
 'metric_names': ['accuracy_score',
  'brier_score_loss',
  'auc',
  'average_precision_recall'],
 'elapsed_time [s/iter]': 0.50068359375,
 'n_test': 224,
 'n_train': 894,
 'mean_train_time': 0.4828409433364868,
 'std_train_time': 0.17421900020679665,
 'perform_pca': False,
 'scale_pc': False,
 'metadata': {},
 'supp_train_data': None,
 'accuracy_score': {'std': 0.01752480930927727, 'mean': 0.8647321428571428},
 'brier_score_loss': {'std': 0.017524809309277255,
  'mean': 0.13526

In [10]:
for metric_name, value in zip(METRIC_NAMES, metrics_results):
    print(metric_name)
    print(value)
    print('\n')

accuracy_score
[0.84375    0.87053571 0.89285714 0.89285714 0.83928571 0.85714286
 0.85267857 0.85714286 0.875      0.86607143]


brier_score_loss
[0.15625    0.12946429 0.10714286 0.10714286 0.16071429 0.14285714
 0.14732143 0.14285714 0.125      0.13392857]


auc
[0.92009569 0.89974457 0.91685484 0.91328281 0.89071652 0.89775541
 0.89971989 0.91479856 0.91679298 0.90598017]


average_precision_recall
[0.86897016 0.8602443  0.82695177 0.89628809 0.79604814 0.82405493
 0.8573906  0.85655914 0.85466862 0.8513932 ]


