In [1]:
import sys
import os
import glob
import struct
import binascii
import datetime

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd

# import dateutil
# from scipy import __version__ as scipy_version
# import scipy.interpolate
# import scipy.stats
# from statsmodels import __version__ as statsmodels_version
# import statsmodels.formula.api as smformula

from collections import OrderedDict
from IPython.display import display, HTML

print(sys.version)
print('numpy', np.__version__)
print('matplotlib', mpl.__version__)
print('pandas',pd.__version__)
# print('scipy', scipy_version)
# print('statsmodels', statsmodels_version)

3.6.4 |Anaconda, Inc.| (default, Jan 16 2018, 18:10:19) 
[GCC 7.2.0]
numpy 1.14.3
matplotlib 2.2.2
pandas 0.22.0


In [2]:
# Setup plot style
%matplotlib notebook
plt.style.use('seaborn-paper')
mpl.rcParams['figure.facecolor'] = (0.8, 0.8, 0.8, 1)

In [3]:
# Misc utility functions
def tempF2C(x): return (x-32.0)*5.0/9.0
def tempC2F(x): return (x*9.0/5.0)+32.0

In [4]:
def msbin2ieee(msbin):
    """
    Convert an array of 4 bytes containing Microsoft Binary floating point
    number to IEEE floating point format (which is used by Python)
    adapted from: https://github.com/choonkeat/ms2txt/blob/master/metastock/utils.py
    """
    as_int = struct.unpack("i", msbin)
    if not as_int:
        return 0.0
    man = int(struct.unpack('H', msbin[2:])[0])
    if not man:
        return 0.0
    exp = (man & 0xff00) - 0x0200
    man = man & 0x7f | (man << 8) & 0x8000
    man |= exp >> 1
    ieee = msbin[:2]
    ieee += bytes([man & 0xFF])
    ieee += bytes([(man >> 8) & 0xFF])
#     print(ieee)
    return struct.unpack("f", ieee)[0]

display(msbin2ieee(b'\xDB\x0F\x49\x81')*2) # should ~= pi
display(msbin2ieee(b'\xCD\xCC\x70\x86'))

3.1415927410125732

60.20000076293945

In [5]:
FN = '../../cdfa_dd/LAAR17'
INPUT_TEMPS_IN_F = True # output should be C

In [6]:
# 3 AS N$, 2 AS D$, 2 AS M$, 2 AS Y$, 5 AS M2$, 5 AS M3$, 16 AS X$
date_fmt = r"=hchhh" # 5 5 16"
date_size = struct.calcsize(date_fmt)
current_year = int(datetime.datetime.strftime(datetime.date.today(), '%Y'))

dat = []
with open(FN,'rb') as fh:
    while True:
        date_bytes = fh.read(date_size)
        if len(date_bytes) < date_size:
            if len(date_bytes) > 0:
                print("WARNING: Some data left in file")
            break
        (n, _, d, m, y) = struct.unpack(date_fmt, date_bytes)
        tmin = msbin2ieee(fh.read(4))
        assert fh.read(1) == b' ' # This byte should be a space (0x20)
        tmax = msbin2ieee(fh.read(4))
        assert fh.read(1) == b' ' # This byte should be a space (0x20)
        fooX = fh.read(16) # specified in old BASIC code but not used.  Don't know what it is
        # mark projections more properly
        is_projection = y==0
        # F to C if needed
        if False: #INPUT_TEMPS_IN_F:
            tmin = tempF2C(tmin)
            tmax = tempF2C(tmax)
        # convert 2 digit date to 4 digits... This will break in 2100
        fully = y+2000
        if fully > current_year:
            fully -= 100
        if is_projection:
            fully = current_year
        date_str = "{:04d}-{:02d}-{:02d}".format(fully, m, d)
        dat.append([n, d, m, y, tmin, tmax, date_str, is_projection])
        _ = fh.read(128-35) # skip to next record in the file (each record is 128 bytes because??)
    dat = pd.DataFrame(dat, columns=['jday', 'day', 'month', 'year', 'Tmin', 'Tmax', 'date_str', 'is_projection'])

# make date col of datetime objects
dat['date'] = pd.to_datetime(dat['date_str'])
dat.drop('date_str', axis=1, inplace=True) # don't need the strings anymore

display(dat)

Unnamed: 0,jday,day,month,year,Tmin,Tmax,is_projection,date
0,1,1,1,17,42.700001,60.200001,False,2017-01-01
1,2,2,1,17,52.000000,58.299999,False,2017-01-02
2,3,3,1,17,49.099998,59.599998,False,2017-01-03
3,4,4,1,17,48.200001,61.900002,False,2017-01-04
4,5,5,1,17,49.299999,58.099998,False,2017-01-05
5,6,6,1,17,47.400002,56.700001,False,2017-01-06
6,7,7,1,17,46.299999,58.000000,False,2017-01-07
7,8,8,1,17,45.000000,63.299999,False,2017-01-08
8,9,9,1,17,46.299999,60.700001,False,2017-01-09
9,10,10,1,17,52.200001,63.000000,False,2017-01-10


In [13]:
t = dat.copy(deep=True)
t = t.set_index('date')
t.loc['2017-01-04':'2017-01-07'] = np.nan
t.head(10)

Unnamed: 0_level_0,jday,day,month,year,Tmin,Tmax,is_projection
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-01-01,1.0,1.0,1.0,17.0,42.700001,60.200001,0.0
2017-01-02,2.0,2.0,1.0,17.0,52.0,58.299999,0.0
2017-01-03,3.0,3.0,1.0,17.0,49.099998,59.599998,0.0
2017-01-04,,,,,,,
2017-01-05,,,,,,,
2017-01-06,,,,,,,
2017-01-07,,,,,,,
2017-01-08,8.0,8.0,1.0,17.0,45.0,63.299999,0.0
2017-01-09,9.0,9.0,1.0,17.0,46.299999,60.700001,0.0
2017-01-10,10.0,10.0,1.0,17.0,52.200001,63.0,0.0
