# SDP Parametric Model Data Export

(Last tested with IPython 4.2.0 using Python 3.5 - if you have trouble, check version compatibility)

In [None]:
from __future__ import print_function
import sys
import os
from ipywidgets import interact_manual, Select, SelectMultiple
from IPython.display import display, Markdown
sys.path+=['..']
from sdp_par_model import reports as iapi
from sdp_par_model.parameters.definitions import *

import subprocess, string
git_date = subprocess.check_output(["git", "log", "-1", "--format=%ci"]).split()[0].decode()
git_rev = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).split()[0].decode()
csv_path = os.path.join('..','data','csv','')

## Calculation results can be exported as CSV

These files can be opened by standard spreadsheet programs such as LibreOffice Calc or Microsoft Excel.

In [None]:
interact_manual(iapi.write_csv_hpsos,
                filename=csv_path + git_date + "-" + git_rev + "_hpsos.csv",
                hpsos=SelectMultiple(options=HPSOs.available_hpsos, value=HPSOs.available_hpsos));

In [None]:
interact_manual(iapi.write_csv_pipelines,
                filename=csv_path + git_date + "-" + git_rev + "_pipelines.csv",
                telescopes=SelectMultiple(options=Telescopes.available_teles, value=Telescopes.available_teles),
                bands=SelectMultiple(options=Bands.available_bands, value=Bands.available_bands),
                pipelines=SelectMultiple(options=Pipelines.available_pipelines, value=Pipelines.available_pipelines));

## Generate CSV data for all pipelines and HPSOs

This might take a bit of time!

In [None]:
out_path = os.path.join('out', 'csv')
if not os.path.isdir(out_path):
    os.makedirs(out_path);
iapi.write_csv_hpsos(os.path.join(out_path, 'current_hpsos.csv'), HPSOs.available_hpsos)
iapi.write_csv_pipelines(os.path.join(out_path, 'current_pipelines.csv'),
                         Telescopes.available_teles, Bands.available_bands, Pipelines.available_pipelines)

## CSV tables can be compared to track changes

Note that you might have to re-execute the cell if freshly generated CSV files don't appear in the list. The reference files selected by default is the last one that was checked into version control.

In [None]:
ls = [ csv_path+"%s" % n for n in os.listdir(csv_path) if n.endswith(".csv") ]
ls = sorted(ls, key=lambda n: os.stat(n).st_mtime)
results = list(reversed(ls))
refs = subprocess.check_output(["git", "log", "--pretty=format:", "--name-only", csv_path]).split()
refs = [os.path.relpath(r.decode(), "iPython") for r in refs]
result_refs = list([ r for r in results if r in refs ])
interact_manual(iapi.compare_csv,
                result_file=Select(options=results, value=results[0]),
                ref_file=Select(options=results, value=result_refs[0]))

## Compare *all* pipelines and HPSO versions

We go back in (Git) history and pairwise compare them. The current results (see above) are included.

In [None]:
out_path = os.path.join('out', 'csv')
# Get all reference files from Git history
refs = subprocess.check_output(["git", "log", "--pretty=format:", "--name-only", "--reverse", "../data/csv"]).split()
refs = list(map(lambda r: os.path.relpath(r.decode(), "iPython"), reversed(refs)))
hpso_refs = list(filter(lambda n: n.endswith('_hpsos.csv'), refs))
all_hpsos = [os.path.join(out_path, 'current_hpsos.csv')] + hpso_refs
pipelines_refs = list(filter(lambda n: n.endswith('_pipelines.csv'), refs))
all_pipelines = [os.path.join(out_path, 'current_pipelines.csv')] + pipelines_refs

# Quick and dirty way to get the piece of history
import re
def git_log(file0, file1):
    display(Markdown("### Comparing %s with %s" % (file1, file0)))

    match0 = re.search(r"-([0-9A-Fa-f]+)_", file0)
    if match0 is None: return
    ref0 = match0.group(1)

    if file1.startswith(out_path): # current
        ref1 = "HEAD"
    else:
        match1 = re.search(r"-([0-9A-Fa-f]+)_", file1)
        if match1 is None: return
        ref1 = match1.group(1)

    print(subprocess.check_output(["git", "log", ref0 + ".." + ref1, '--format=%h %ci %cn: %s']).decode())

# Compare pairwise, track changes
diffs_hpsos = []
for i, (results, reference) in enumerate(zip(all_hpsos, all_hpsos[1:])):
    git_log(reference, results)
    diffs_hpsos.append(iapi.compare_csv(results, reference,
                                        export_html=os.path.join(out_path,"compare_hpsos_%d.html" % i)))
diffs_pipelines = []
for i, (results, reference) in enumerate(zip(all_pipelines, all_pipelines[1:])):
    git_log(reference, results)
    diffs_pipelines.append(iapi.compare_csv(results, reference,
                                            export_html=os.path.join(out_path,"compare_pipelines_%d.html" % i)))

We always want to keep the current numbers under source control for easy comparisons. Therefore, the CSV file checked into Git should exactly match the current results.

In [None]:
assert diffs_hpsos[0] < 1e-6, "%s does not match (%g)! Forgot to check in new results?" % (all_hpsos[1], diffs_hpsos[0])
assert diffs_pipelines[0] < 1e-6, "%s does not match (%g)! Forgot to check in  new results?" % (all_pipelines[1], diffs_pipelines[0])