In [1]:
pd.options.display.max_columns = 100

In [2]:
l1adarkfnames = io.get_current_science_fnames('l1a', 'uvdark', env='production')

/maven_iuvs/production/products/level1a


In [3]:
len(l1adarkfnames)

14132

In [4]:
l1a = io.L1AReader(l1adarkfnames[3])
l1a.fname

'/maven_iuvs/production/products/level1a/mvn_iuv_l1a_APP1-orbit00084-mode0111-fuvdark_20141013T233556_v02_r01.fits.gz'

In [5]:
l1a.spabins.T

array([[103, 103, 103, 103, 103, 103, 103, 103, 103,  93]], dtype=int16)

In [6]:
l1a.Binning['SPABINWIDTH']

array([[  4, 103, 103, 103, 103, 103, 103, 103, 103, 103,  93]], dtype=int16)

In [7]:
l1a.Binning['SPABINTRANSMIT']

array([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=int16)

In [8]:
l1a.img.shape

(10, 256)

In [9]:
from IPython.html.widgets import interact, interactive, fixed
from IPython.html import widgets




In [10]:
def f(i):
    l1a = io.L1AReader(l1adarkfnames[i])
    print('n_dims:', l1a.n_dims)
    i = 0
    if l1a.n_dims == 3:
        i = 1
    print(l1a.img.shape[i])
    print(l1a.Binning['SPABINWIDTH'].shape[1])

In [388]:
interact(f, i=(0,len(l1adarkfnames)-1));

n_dims: 3
10
12


## Generate dataframe with filename stats

In [11]:
df = pd.DataFrame([io.ScienceFilename(str(i)).as_series() for i in l1adarkfnames])

In [12]:
df.columns

Index(['basename', 'channel', 'cycle_orbit', 'instrument', 'level', 'mission',
       'mode', 'obs_id', 'p', 'phase', 'revision', 'root', 'time', 'timestr',
       'tokens', 'version', 'version_string'],
      dtype='object')

In [13]:
df['phase'].value_counts()

IPH2            3181
apoapse         3123
periapse        1881
early           1754
outbound         785
IPH1             672
inbound          405
outboundhifi     372
ISON2            299
IPH3             226
ISON1            224
occultation      217
centroid         214
cruisecal2       146
outdisk          142
outlimb          134
APP1             112
APP1A             56
APP2              55
cruisecal1        50
outdiskhifi       36
comet             22
periapsehifi      20
checkout           4
                   2
Name: phase, dtype: int64

In [110]:
from IPython.parallel import Client
c = Client()

In [111]:
dview = c.direct_view()
lview = c.load_balanced_view()

In [16]:
def check_for_issues(p):
    from iuvs import exceptions
    d = {}
    d['fname'] = p.name
    try:
        l1a = io.L1AReader(str(p))
    except exceptions.DimensionsError:
        d['dims'] = False
    d['kind'] = l1a.img_header['BIN_TBL'][:3]
    d['n_unique_spabins'] = l1a.n_unique_spabins
    d['n_unique_spebins'] = l1a.n_unique_spebins
    return d

In [17]:
check_for_issues(df.p[4])

{'fname': 'mvn_iuv_l1a_APP1-orbit00084-mode0111-muvdark_20141013T232042_v02_r01.fits.gz',
 'kind': 'LIN',
 'n_unique_spabins': 2,
 'n_unique_spebins': 1}

In [18]:
doing = df.p
results = lview.map_async(check_for_issues, doing)

In [19]:
from iuvs.multitools import nb_progress_display
results.ready()

False

In [20]:
nb_progress_display(results, doing)

In [21]:
resultdf = pd.DataFrame(results.result)

In [22]:
for col in resultdf.columns:
    if col == 'fname': continue
    print(col)
    print(resultdf[col].value_counts(dropna=False))

kind
LIN    13758
NON      374
Name: kind, dtype: int64
n_unique_spabins
1    13567
2      565
Name: n_unique_spabins, dtype: int64
n_unique_spebins
1    13171
2      775
9      186
Name: n_unique_spebins, dtype: int64


In [23]:
resultdf['phase'] = resultdf.fname.map(lambda x: io.ScienceFilename(x).phase)

In [24]:
subdf = resultdf[(resultdf.n_unique_spabins==2) | (resultdf.n_unique_spebins==2)]

In [40]:
subdf[subdf.kind=='LIN'].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 645 entries, 2 to 4301
Data columns (total 5 columns):
fname               645 non-null object
kind                645 non-null object
n_unique_spabins    645 non-null int64
n_unique_spebins    645 non-null int64
phase               645 non-null object
dtypes: int64(2), object(3)
memory usage: 30.2+ KB


## Scanning code

In [271]:
def process_fname(fname):
    from iuvs import exceptions
    import numpy as np
    l1a = io.L1AReader(fname)
    d = dict(fname=fname)
    try:
        l1a = io.L1AReader(fname)
    except exceptions.DimensionsError:
        d['error'] = 'dims'
        return d
    if l1a.img_header['BIN_TBL'].startswith('NON LINEAR'):
        d['error'] = 'nonlinear'
        return d
    if any([l1a.n_unique_spabins>1, l1a.n_unique_spebins>1]):
        d['error'] = 'badbins'
        return d
    main_header = io.get_header_df(l1a.hdulist[0])
    integration = l1a.Integration
    if main_header.loc[0, 'NAXIS'] == 2:
        main_header.loc[0, 'NAXIS3'] = np.nan
        avgtuple = None
    elif main_header.loc[0, 'NAXIS'] == 3:
        avgtuple = (1,2)
    else:
        d['error'] = 'axes'
        return d
    try:
        integration['mean'] = l1a.primary_img_dn_s.mean(axis=avgtuple)
        integration['median'] = np.median(l1a.primary_img_dn_s, axis=avgtuple)
        integration['std'] = l1a.primary_img_dn_s.std(axis=avgtuple)
    except KeyError:
        integration['mean'] = np.nan
        integration['median'] = np.nan
        integration['std'] = np.nan
    lenint = len(integration)
    if lenint > 1:
        main_header = main_header.append([main_header]*(lenint-1), ignore_index=True)
    joined = pd.concat([integration, main_header], axis=1)
    for col in l1a.Observation.names[:-3]:
        val = l1a.Observation[col][0]
        if col == 'COLLECTION_ID':
            val = val[0]
        joined[col] = val
    savepath = io.save_to_hdf(joined.sort_index(axis=1), fname, 'l1a_dark_scans')
    d['success'] = True
    return d
#     return joined, 0

In [272]:
process_fname(l1adarkfnames[2000])

{'fname': PosixPath('/maven_iuvs/production/products/level1a/mvn_iuv_l1a_IPH2-cycle00075-mode060-muvdark_20140513T205057_v02_r01.fits.gz'),
 'success': True}

In [273]:
doing = l1adarkfnames
results = lview.map_async(process_fname, doing)

In [274]:
nb_progress_display(results, doing)

KeyboardInterrupt: 

In [281]:
results_df = pd.DataFrame(results.result)

In [61]:
results_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18826 entries, 0 to 18825
Data columns (total 53 columns):
ANC_SVN_REVISION         18826 non-null object
BIN_PATTERN_INDEX        18826 non-null object
BIN_TBL                  18826 non-null object
BITPIX                   18826 non-null int64
BLANK                    18826 non-null int64
BUNDLE_ID                18826 non-null object
CADENCE                  18826 non-null float64
CAPTURE                  18826 non-null object
CASE_TEMP                18826 non-null float64
CHANNEL                  18826 non-null object
CODE_SVN_REVISION        18826 non-null object
COLLECTION_ID            18826 non-null object
DET_TEMP                 18826 non-null float64
DUTY_CYCLE               18826 non-null float64
ET                       18826 non-null float64
EXTEND                   18826 non-null bool
FILENAME                 18826 non-null object
FOV_DEG                  18826 non-null float64
GRATING_SELECT           18826 non-null obj

In [62]:
results_df.loc[results_df.ANC_SVN_REVISION == '', 'ANC_SVN_REVISION'] = 0

In [63]:
results_df = results_df.convert_objects(convert_numeric=True)

In [64]:
results_df.to_hdf('/home/klay6683/output/l1a_dark_scans/results_df.h5', 'df')

## Merge temporary h5 files to database

In [277]:
import glob
h5fnames = glob.glob("/home/klay6683/output/l1a_dark_scans/*.h5")

In [278]:
len(h5fnames)

13107

In [279]:
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

In [280]:
dfs = []
for i,chunk in enumerate(chunker(h5fnames, 200)):
    print("Chunk {}".format(i))
    frames = []
    for fname in chunk:
        frames.append(pd.read_hdf(fname, 'df'))
    dfs.append(pd.concat(frames, ignore_index=True))

Chunk 0
Chunk 1
Chunk 2
Chunk 3
Chunk 4
Chunk 5
Chunk 6
Chunk 7
Chunk 8
Chunk 9
Chunk 10
Chunk 11
Chunk 12
Chunk 13
Chunk 14
Chunk 15
Chunk 16
Chunk 17
Chunk 18
Chunk 19
Chunk 20
Chunk 21
Chunk 22
Chunk 23
Chunk 24
Chunk 25
Chunk 26
Chunk 27
Chunk 28
Chunk 29
Chunk 30
Chunk 31
Chunk 32
Chunk 33
Chunk 34
Chunk 35
Chunk 36
Chunk 37
Chunk 38
Chunk 39
Chunk 40
Chunk 41
Chunk 42
Chunk 43
Chunk 44
Chunk 45
Chunk 46
Chunk 47
Chunk 48
Chunk 49
Chunk 50
Chunk 51
Chunk 52
Chunk 53
Chunk 54
Chunk 55
Chunk 56
Chunk 57
Chunk 58
Chunk 59
Chunk 60
Chunk 61
Chunk 62
Chunk 63
Chunk 64
Chunk 65


In [282]:
superdf = pd.concat(dfs, ignore_index=True)

In [283]:
superdf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 51306 entries, 0 to 51305
Data columns (total 52 columns):
ANC_SVN_REVISION         51306 non-null object
BIN_PATTERN_INDEX        51306 non-null object
BIN_TBL                  51306 non-null object
BITPIX                   51306 non-null int64
BLANK                    51306 non-null int64
BUNDLE_ID                51306 non-null object
CADENCE                  51306 non-null float64
CAPTURE                  51306 non-null object
CASE_TEMP                51306 non-null float64
CHANNEL                  51306 non-null object
CODE_SVN_REVISION        51306 non-null object
COLLECTION_ID            51306 non-null object
DET_TEMP                 51306 non-null float64
DUTY_CYCLE               51306 non-null float64
ET                       51306 non-null float64
EXTEND                   51306 non-null bool
FILENAME                 51306 non-null object
FOV_DEG                  51306 non-null float32
GRATING_SELECT           51306 non-null obj

In [284]:
from iuvs import calib

In [285]:
superdf.DET_TEMP = superdf.DET_TEMP.map(calib.convert_det_temp_to_C)
superdf.CASE_TEMP = superdf.CASE_TEMP.map(calib.convert_case_temp_to_C)

In [286]:
superdf.to_hdf('/home/klay6683/to_keep/l1a_dark_scan.h5','df')

In [287]:
from iuvs import meta