In [103]:
'''
# author: Mark R. Yoder, Ph.D.
# email: myoder@landauer.com
#		mark.yoder@gmail.com
#
# Code property of: Landauer Inc.
#
# datetime handler worksheet.
#
# summary:
# we get dates and datetimes in a bunch of different formats; we want to make a generic handler for all of them.
# expected types are:
# - datetime strings in various formats:
#    - date delimiters: '/', '-'
#    - date-time delimter: \t, ' ', ??
#    - time delimiters: ':' ??
#    - miliseconds or fractional seconds?
# - datetime.date, datetime.datetime (it should be easy to interchange these)
# - numpy.datetime64 (or something like that): these tends to come from matplotlib.dates
#    - this is easily converted to a datetime.datetime via list(x)... i think.
# - let's maybe also add a hanlder that assumes UTC for "naive" date types.
# - use whatever it takes, including matplotlib.dates and just require these libraries.
'''
%load_ext autoreload
%autoreload 2

import numpy
import matplotlib.dates as mpd
import datetime as dtm
import pytz
#
#import datetime_handler as dhp
from data_type_wranglers import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [104]:
def datetime_handler_unit_tests():
#
    # TODO: figure out how to get the local timezone so we can properly evaluate whether or not the timezone
    # conversions work. right now, i'm just making them work from CDT.
    #
    # lay out some scenarios and the expected equality to dtm0 ([[date_entry, equals_dtm0], ...])
    dtm0 = dtm.datetime(2016,5,10,11,25,10,int(.77*1e6))
    f_dtm0 = mpd.date2num(dtm0)
    # string format variations:
    dtstrs = [['2016-05-10 11:25:10.77', True], ['2016/05/10 11:25:10.77', True], ['2016-05-10 11:25:10.77', True],
              ['2016/05/10 11:25:10.77', True], ['5-10-2016  11:25:10.77', True], ['5/10/2016  11:25:10.77', True],
              ['05-10-2016  11:25:10.77', True]]
    # now, timezone information and variations:
    dtstrs += [['5-10-2016  11:25:10.77+00:00', True], ['5-10-2016  11:25:10.77+01:00', False],
              ['5-10-2016  11:25:10.77+02:00', False], ['5-10-2016  7:25:10.77-04:00', True]]
    #
    # some datetime objects, including with tz variability.
    # ... but these are tricky because, apparently, US/Eastern timezone is not -5, but -4:56
    # and Central is not -6, but -5:51... which totally makes sense.
    # ... but some versions of pytz appear to use the integer values, so this will be a screwy test to diagnose.
    dtstrs += [[dtm.datetime(2016,5,10,11,25,10,int(.77*1e6)), True], 
               [dtm.datetime(2016,5,10,11,25,10,int(.77*1e6), pytz.timezone('UTC')), True],
               [dtm.datetime(2016,5,10,6,29,10,int(.77*1e6), pytz.timezone('US/Eastern')), True],
               [dtm.datetime(2016,5,10,5,34,10,int(.77*1e6), pytz.timezone('US/Central')), True],
               [dtm.datetime(2016,5,10,11,25,10,int(.77*1e6), pytz.timezone('US/Central')), False]
              ]
    #
    # ... and how 'bout those stupid numpy.datetime64 objects. ugh...
    # note some syntax: these are rec-array types, so they have an expected length. month, day, etc. entries must
    # be len=2, so "-05", not "-5". also note that when we give it a time-zone, it will return something in the local
    # time but with the timezone info included.
    # the
    dtstrs += [[numpy.datetime64('2016-05-10 11:25:10.77'), False], 
              [numpy.datetime64('2016-05-10 11:25:10.77+04:00'), False],
              [numpy.datetime64('2016-05-10 11:25:10.77-03:00'), False], 
              [numpy.datetime64('2016-05-10 11:25:10.77-00:00'), True]
              ]
    #
    failed_tests = []
    #
    print("target datetime: {}".format(dtm0))
    for j, (dt, t_f) in  enumerate(dtstrs):
        dtm_processed = datetime_handler(dt, tz_out=tz_utc)
        #
        # we want to check equality, but we can get small differences in microseconds.
        # we can't subtract (not)-naive types, so convert to numbers...
        #delta_t = f_dtm0-mpd.date2num(dtm_processed)
        is_equal = mpd.date2num(dtm_processed)==f_dtm0
        did_pass = (is_equal==t_f)
        if not did_pass: failed_tests += [[j,dt,t_f]]
        print('({}) dt_{}[{}::({})]: {} :: {}'.format('pass' if did_pass else 'fail', j, is_equal, t_f, dt, dtm_processed))
        #
    #
    n_failed = len(failed_tests)
    n_total  = len(dtstrs)
    print('\n\n{}% ({} of {}) of tests passed.'.format(100.*(n_total-n_failed)/n_total, (n_total-n_failed), n_total))
    
    if len(failed_tests)>0:
        print("[{}/{}] failed tests: ".format(n_failed, n_total))
        for rw in failed_tests:
            print('{}'.format(rw))
    else:
        print('All tests passed!')
#





In [105]:
aa=datetime_handler_unit_tests()

target datetime: 2016-05-10 11:25:10.770000
(pass) dt_0[True::(True)]: 2016-05-10 11:25:10.77 :: 2016-05-10 11:25:10.770002+00:00
(pass) dt_1[True::(True)]: 2016/05/10 11:25:10.77 :: 2016-05-10 11:25:10.770002+00:00
(pass) dt_2[True::(True)]: 2016-05-10 11:25:10.77 :: 2016-05-10 11:25:10.770002+00:00
(pass) dt_3[True::(True)]: 2016/05/10 11:25:10.77 :: 2016-05-10 11:25:10.770002+00:00
(pass) dt_4[True::(True)]: 5-10-2016  11:25:10.77 :: 2016-05-10 11:25:10.770002+00:00
(pass) dt_5[True::(True)]: 5/10/2016  11:25:10.77 :: 2016-05-10 11:25:10.770002+00:00
(pass) dt_6[True::(True)]: 05-10-2016  11:25:10.77 :: 2016-05-10 11:25:10.770002+00:00
(pass) dt_7[True::(True)]: 5-10-2016  11:25:10.77+00:00 :: 2016-05-10 11:25:10.770002+00:00
(pass) dt_8[False::(False)]: 5-10-2016  11:25:10.77+01:00 :: 2016-05-10 10:25:10.769995+00:00
(pass) dt_9[False::(False)]: 5-10-2016  11:25:10.77+02:00 :: 2016-05-10 09:25:10.769999+00:00
(pass) dt_10[True::(True)]: 5-10-2016  7:25:10.77-04:00 :: 2016-05-10 11: