# Experiment 1: Typical use case
As a test to ensure that the limits of are similar to METASPACE, this is one of the larger datasets that has been processed.

### METRICS TO BENCHMARK
* Performance:
    * **Metric:** Total processing time

        **Goal:** similar to or faster than METASPACE (including cluster start time)

# Notebook setup
Run python3 setup.py install to install all requirements for annotation pipeline project.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%config Completer.use_jedi = False
%matplotlib inline

In [None]:
# We need this to overcome Python notebooks limitations of too many open files
import resource
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
print('Before:', soft, hard)

# Raising the soft limit. Hard limits can be raised only by sudo users
resource.setrlimit(resource.RLIMIT_NOFILE, (10000, hard))
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
print('After:', soft, hard)

In [None]:
# These are Python and Python lib path we want to use
import sys
sys.executable, sys.prefix

In [None]:
#Install PyWren-IBM if needed
try:
    import pywren_ibm_cloud as pywren
except ModuleNotFoundError:    
    !{sys.executable} -m pip install -U pywren-ibm-cloud==1.0.10
    import pywren_ibm_cloud as pywren

pywren.__version__

In [None]:
import logging
logging.basicConfig(level=logging.INFO)

## Configuration

In [None]:
import json
config = json.load(open('config.json'))

In [None]:
#input_config = json.load(open('metabolomics/input_config_small.json'))
#input_config = json.load(open('metabolomics/input_config_big.json'))
input_config = json.load(open('metabolomics/input_config_huge.json'))
input_data = input_config['dataset']
input_db = input_config['molecular_db']

# Benchmark

In [None]:
import pandas as pd
from datetime import datetime
from annotation_pipeline.molecular_db import dump_mol_db, build_database, \
    calculate_centroids, get_formula_id_dfs
from annotation_pipeline.pipeline import Pipeline

In [None]:
from annotation_pipeline.utils import init_pywren_stats
init_pywren_stats()

### Build molecular database and Run Annotation Pipeline

In [None]:
start_time = datetime.now()

# Build molecular database:
dump_mol_db(config, config['storage']['db_bucket'], 'metabolomics/db/mol_db1.pickle', 22) #HMDB-v4
dump_mol_db(config, config['storage']['db_bucket'], 'metabolomics/db/mol_db2.pickle', 19) #ChEBI-2018-01
dump_mol_db(config, config['storage']['db_bucket'], 'metabolomics/db/mol_db3.pickle', 24) #LipidMaps-2017-12-12
dump_mol_db(config, config['storage']['db_bucket'], 'metabolomics/db/mol_db4.pickle', 26) #SwissLipids-2018-02-02
build_database(config, input_db)
polarity = input_data['polarity']
isocalc_sigma = input_data['isocalc_sigma']
calculate_centroids(config, input_db, polarity, isocalc_sigma)

# Run Annotation Pipeline:
pipeline = Pipeline(config, input_config)
pipeline()
results_df = pipeline.get_results()
images_dict = pipeline.get_images()

finish_time = datetime.now()

In [None]:
print('start', start_time)
print('finish', finish_time)
print('duration', finish_time - start_time)

In [None]:
# Display PyWren statistics file
pd.read_csv('stats.csv')

In [None]:
from annotation_pipeline.utils import remove_pywren_stats
remove_pywren_stats()

# Clean Temp Data

In [None]:
from annotation_pipeline.utils import clean_from_cos

In [None]:
# Clean formulas chunks
clean_from_cos(config, config["storage"]["db_bucket"], input_data["formulas_chunks"])

In [None]:
# Clean dataset chunks
clean_from_cos(config, config["storage"]["ds_bucket"], input_data["ds_chunks"])

In [None]:
# Clean dataset segments
clean_from_cos(config, config["storage"]["ds_bucket"], input_data["ds_segments"])

In [None]:
# Clean centroids database segments
clean_from_cos(config, config["storage"]["db_bucket"], input_db["centroids_segments"])

In [None]:
# Clean formula output images
clean_from_cos(config, config["storage"]["output_bucket"], output["formula_images"])

In [None]:
# Clean FDR rankings
clean_from_cos(config, config["storage"]["ds_bucket"], input_data["fdr_rankings"])