In [162]:
import datetime as dt
import numpy as np
import pandas as pds
import time

import pysat

def print_range(inst):
    """Print loaded data range"""
    print('Loaded Data Range: ')
    print(inst.index[0], ' --- ', inst.index[-1], '\n')

# The following set up a few test conditions, run one, then execute the 'Testing Load Behaviors' cell below.


In [147]:
# basic instrument setup
inst = pysat.Instrument('pysat', 'testing')
verify = False

In [6]:
# instrument setup with data padding
inst = pysat.Instrument('pysat', 'testing', pad={'minutes': 5})
verify = True

In [2]:
# instrument setup with longer data padding
inst = pysat.Instrument('pysat', 'testing', pad={'minutes': 5, 'days': 1})
verify = True

In [8]:
# instrument setup with multi_file_day
inst = pysat.Instrument('pysat', 'testing', multi_file_day=True)
verify = True

# Testing Load Behaviors

In [151]:
# load by yr, doy
try:
    print('load command via yr, doy: ', '2009, 1')
    inst.load(2009, 1, verifyPad=verify)
    print_range(inst)
except ValueError as err:
    print(err, '\n')

# inclusive/exclusive syntax for loading over a range of dates
print('load command via yr, doy pairs: ', '2009, 1, 2009, 3')
inst.load(2009, 1, 2009, 3, verifyPad=verify)
print_range(inst)

# load by date
try:
    date = dt.datetime(2009, 1, 1)
    print('load command via date: ', date)
    inst.load(date=date, verifyPad=verify)
    print_range(inst)
except ValueError as err:
    print(err, '\n')


# load by range of dates
date = dt.datetime(2009, 1, 1)
end_date = dt.datetime(2009, 1, 3)
print('load command via dates: ', date, ', ', end_date)
# inclusive/exclusive syntax for loading over a range of dates
inst.load(date=date, end_date=end_date, verifyPad=verify)
print_range(inst)

# load by filename
try:
    fname = inst.files[366]
    print('Load via filename: ', fname)
    inst.load(fname=fname, verifyPad=verify)
    print_range(inst)
except ValueError as err:
    print(err, '\n')


# load by filenames
try:
    fname = inst.files[366]
    stop_fname = inst.files[367]
    # inclusinve syntax for filenames, start and end
    print('Load via filenames: ', fname, ', ', stop_fname)
    inst.load(fname=fname, stop_fname=stop_fname, verifyPad=verify)
    print_range(inst)
except ValueError as err:
    print(err, '\n')


print('Note that inst.date refers to the earliest day loaded, excluding padding')
print('inst.date: ', inst.date)

load command via yr, doy:  2009, 1
Loaded Data Range: 
2009-01-01 00:00:00  ---  2009-01-01 23:59:59 

load command via yr, doy pairs:  2009, 1, 2009, 3
Loaded Data Range: 
2009-01-01 00:00:00  ---  2009-01-02 23:59:59 

load command via date:  2009-01-01 00:00:00
Loaded Data Range: 
2009-01-01 00:00:00  ---  2009-01-01 23:59:59 

load command via dates:  2009-01-01 00:00:00 ,  2009-01-03 00:00:00
Loaded Data Range: 
2009-01-01 00:00:00  ---  2009-01-02 23:59:59 

Load via filename:  2009-01-01.nofile
Loaded Data Range: 
2009-01-01 00:00:00  ---  2009-01-01 23:59:59 

Load via filenames:  2009-01-01.nofile ,  2009-01-02.nofile
Loaded Data Range: 
2009-01-01 00:00:00  ---  2009-01-02 23:59:59 

Note that inst.date refers to the earliest day loaded, excluding padding
inst.date:  2009-01-01 00:00:00


In [152]:
# Load all data, first file through last file
inst = pysat.Instrument('pysat', 'testing', num_daily_samples=10)
inst.load()
print_range(inst)

Loaded Data Range: 
2008-01-01 00:00:00  ---  2010-12-31 00:00:09 



# Test out iteration behaviours

# Run one of the cells below to set up checking out iteration via date, or via file

In [153]:
# Set bounds on instrument iteration, denoted via date or file limits.
# Format when setting bounds:
#     bounds = (start dates/files, stop dates/files, stepsize, width of loaded data)
# Note that start and stop dates are effectively imposed upon inst.date, or the earliest loaded day in the object.
# When loading with a data width greater than one day, some of the data samples on the last iteration may 
# extend past the supplied bounds, depending upon the step size and data width chosen.


# set up bounds via dates
# iterate with step size of 2, width of 2 (days)
date = inst.files.start_date
date2 = inst.files.start_date + pds.DateOffset(days=10)
date2 = date2.to_pydatetime()
# format (dt.datetime of list-of, dt.datetime of list-of, pandas frequency string, pandas Date Offset)
# Ensuring type consistency means that Timestamp date2 needs to become datetime date2.
# Incidentally, the frequency string '2D' can also be a DateOffset
bounds = (date, date2, '2D', pds.DateOffset(days=2))
inst.bounds = bounds
print(inst.bounds)

([datetime.datetime(2008, 1, 1, 0, 0)], [datetime.datetime(2008, 1, 11, 0, 0)], '2D', <DateOffset: days=2>)


In [2]:
# set up bounds via dates
# iterate with step size of 2, width of 1 (days)
date = inst.files.start_date
date2 = inst.files.start_date + pds.DateOffset(days=10)
date2 = date2.to_pydatetime()
# format (dt.datetime of list-of, dt.datetime of list-of, pandas frequency string, pandas Date Offset)
# Ensuring type consistency means that Timestamp date2 needs to become datetime date2.
# Incidentally, the frequency string '2D' can also be a DateOffset
bounds = (date, date2, '2D', pds.DateOffset(days=1))
inst.bounds = bounds
print(inst.bounds)

([datetime.datetime(2008, 1, 1, 0, 0)], [datetime.datetime(2008, 1, 11, 0, 0)], '2D', <DateOffset: days=1>)


In [41]:
# set up multiple bounds via dates
# iterate with step size of 3, width of 4 (days)

date = inst.files.start_date
date2 = inst.files.start_date + pds.DateOffset(days=10)
date2 = date2.to_pydatetime()

date3 = date2 + pds.DateOffset(days=20)
date3 = date3.to_pydatetime()
date4 = date3 + pds.DateOffset(days=10)
date4 = date4.to_pydatetime()

# format (dt.datetime of list-of, dt.datetime of list-of, pandas frequency string, pandas Date Offset)
# Ensuring type consistency means that Timestamp date2 needs to become datetime date2.
# Incidentally, the frequency string '2D' can also be a DateOffset
bounds = ((date, date3), (date2, date4), '3D', pds.DateOffset(days=4))
inst.bounds = bounds
print(inst.bounds)

([datetime.datetime(2008, 1, 1, 0, 0), datetime.datetime(2008, 1, 31, 0, 0)], [datetime.datetime(2008, 1, 11, 0, 0), datetime.datetime(2008, 2, 10, 0, 0)], '3D', <DateOffset: days=4>)


In [2]:
# set up bounds via files
# check for a step greater than width
file = inst.files[0]
file2 = inst.files[10]
# step by 2 files, and load 2 files at a time
bounds = (file, file2, 3, 2)
inst.bounds = bounds
print(inst.bounds)

(array(['2008-01-01.nofile'], dtype='<U17'), array(['2008-01-11.nofile'], dtype='<U17'), 3, 2)


In [5]:
# set up bounds via files
# check for a width greater than the step increment
file = inst.files[0]
file2 = inst.files[10]
# step by 2 files, and load 2 files at a time
bounds = (file, file2, 2, 4)
inst.bounds = bounds
print(inst.bounds)

(array(['2008-01-01.nofile'], dtype='<U17'), array(['2008-01-11.nofile'], dtype='<U17'), 2, 4)


In [154]:
# set up bounds via files
# check for a width of one, step greater than 1
file = inst.files[0]
file2 = inst.files[10]
# step by 2 files, and load 1 file at a time
bounds = (file, file2, 2, 1)
inst.bounds = bounds
print(inst.bounds)

(array(['2008-01-01.nofile'], dtype='<U17'), array(['2008-01-11.nofile'], dtype='<U17'), 2, 1)


# Test Iteration

In [159]:
def check_bounds(inst):
    """Check that inst.date is within inst.bounds"""
    if inst._iter_type == 'date':
        assert inst.check_date_in_bounds(inst.date)
    else:
        assert inst._fid >= inst.files.get_index(inst.bounds[0][0])
        assert inst._fid <= inst.files.get_index(inst.bounds[1][0])

# instrument setup
# We need to start with a new Instrument to ensure we test .next() starting from nothing
inst = pysat.Instrument('pysat', 'testing')
inst.bounds = bounds
print('Instrument bounds: ')
for start, stop in zip(inst.bounds[0], inst.bounds[1]):
    print('Start: ', start, ' Stop : ', stop)
print('Step Size : ', bounds[2])
print('Data Width : ', bounds[3])
print('\n')


print('Checking iteration via next(). Starting from new object.')
while True:
    try:
        inst.next()
        print_range(inst)
        check_bounds(inst)
    except StopIteration:
        break

# We need to start with a new Instrument to ensure we test .prev() starting from nothing
inst = pysat.Instrument('pysat', 'testing')
inst.bounds = bounds
print('Checking iteration via prev(). Starting from new object.')
while True:
    try:
        inst.prev()
        print_range(inst)
        check_bounds(inst)
    except StopIteration:
        break


# We need to start with a new Instrument to ensure we test iteration starting from nothing
inst = pysat.Instrument('pysat', 'testing')
inst.bounds = bounds
print('Checking iteration via built-in iteration. Starting from new object.')
for inst in inst:
    print_range(inst)
    check_bounds(inst)


# Instrument currently on last day
print('One last quick check on .prev(), continuing from last object.')
inst.prev()
print_range(inst)

Instrument bounds: 
Start:  2008-01-01.nofile  Stop :  2008-01-11.nofile
Step Size :  2
Data Width :  1


Checking iteration via next(). Starting from new object.
Loaded Data Range: 
2008-01-01 00:00:00  ---  2008-01-01 23:59:59 

Loaded Data Range: 
2008-01-03 00:00:00  ---  2008-01-03 23:59:59 

Loaded Data Range: 
2008-01-05 00:00:00  ---  2008-01-05 23:59:59 

Loaded Data Range: 
2008-01-07 00:00:00  ---  2008-01-07 23:59:59 

Loaded Data Range: 
2008-01-09 00:00:00  ---  2008-01-09 23:59:59 

Loaded Data Range: 
2008-01-11 00:00:00  ---  2008-01-11 23:59:59 

Checking iteration via prev(). Starting from new object.
Loaded Data Range: 
2008-01-11 00:00:00  ---  2008-01-11 23:59:59 

Loaded Data Range: 
2008-01-09 00:00:00  ---  2008-01-09 23:59:59 

Loaded Data Range: 
2008-01-07 00:00:00  ---  2008-01-07 23:59:59 

Loaded Data Range: 
2008-01-05 00:00:00  ---  2008-01-05 23:59:59 

Loaded Data Range: 
2008-01-03 00:00:00  ---  2008-01-03 23:59:59 

Loaded Data Range: 
2008-01-01 0

# Test out orbit behaviors

In [184]:
# requires running a bounds setting cell above (in iteration testing prep area)
orbit_info = {'kind': 'lt', 'index': 'mlt'}
inst = pysat.Instrument('pysat', 'testing', orbit_info=orbit_info)
inst.bounds = bounds


In [174]:
# demonstrate iterating over orbits via next
for i in np.arange(10):
    inst.orbits.next()
    print_range(inst)


Loaded Data Range: 
2008-01-01 00:00:00  ---  2008-01-01 00:38:59 

Loaded Data Range: 
2008-01-01 00:39:00  ---  2008-01-01 02:15:59 

Loaded Data Range: 
2008-01-01 02:16:00  ---  2008-01-01 03:52:59 

Loaded Data Range: 
2008-01-01 03:53:00  ---  2008-01-01 05:29:59 

Loaded Data Range: 
2008-01-01 05:30:00  ---  2008-01-01 07:06:59 

Loaded Data Range: 
2008-01-01 07:07:00  ---  2008-01-01 08:43:59 

Loaded Data Range: 
2008-01-01 08:44:00  ---  2008-01-01 10:20:59 

Loaded Data Range: 
2008-01-01 10:21:00  ---  2008-01-01 11:57:59 

Loaded Data Range: 
2008-01-01 11:58:00  ---  2008-01-01 13:34:59 

Loaded Data Range: 
2008-01-01 13:35:00  ---  2008-01-01 15:11:59 



In [185]:
# demonstrate iterating over orbits via built-in iteration
for i, inst in enumerate(inst.orbits):
    print_range(inst)
    if i > 15:
        break



Loaded Data Range: 
2008-01-01 00:00:00  ---  2008-01-01 00:38:59 

Loaded Data Range: 
2008-01-01 00:39:00  ---  2008-01-01 02:15:59 

Loaded Data Range: 
2008-01-01 02:16:00  ---  2008-01-01 03:52:59 

Loaded Data Range: 
2008-01-01 03:53:00  ---  2008-01-01 05:29:59 

Loaded Data Range: 
2008-01-01 05:30:00  ---  2008-01-01 07:06:59 

Loaded Data Range: 
2008-01-01 07:07:00  ---  2008-01-01 08:43:59 

Loaded Data Range: 
2008-01-01 08:44:00  ---  2008-01-01 10:20:59 

Loaded Data Range: 
2008-01-01 10:21:00  ---  2008-01-01 11:57:59 

Loaded Data Range: 
2008-01-01 11:58:00  ---  2008-01-01 13:34:59 

Loaded Data Range: 
2008-01-01 13:35:00  ---  2008-01-01 15:11:59 

Loaded Data Range: 
2008-01-01 15:12:00  ---  2008-01-01 16:48:59 

Loaded Data Range: 
2008-01-01 16:49:00  ---  2008-01-01 18:25:59 

Loaded Data Range: 
2008-01-01 18:26:00  ---  2008-01-01 20:02:59 

Loaded Data Range: 
2008-01-01 20:03:00  ---  2008-01-01 21:39:59 

Loaded Data Range: 
2008-01-01 21:40:00  ---  20