#### Development Goals

rename things so they make sense for god's sake

function for accessing and editing more easily

cross-checking for errors, more robust error handling
I want ship and receive to ask if I'm sure before updating the local variable.



In [1]:
import xarray as xr
import numpy as np
from datetime import datetime




In [2]:
### Independent functions and inputs
locs = ['ALT', 'AMY', 'BRW','CGO',
            'HFM','KUM','LEF','MHD','MLO',
            'NWR','SMO','SPO','THD','C14','UCI']

def time_tag():
    '''just a date tag for now to not save so many files'''
    now = datetime.now()
    now_str = now.strftime("%Y-%m-%d")
    return now_str

## Look up a specific entry of a dataarray
# return and print that (or those entries)
def lookup(da, lookup_by, value):
    if type(value) == str:
        p = da.where(da.sel(dt=lookup_by)==value, drop=True)
    if type(value) == list:
        p = [da.where(da.sel(dt=lookup_by)==v, drop=True) for v in value]
    pprint.pp(p)
    return(p)


In [7]:
from collections import UserDict
class stationdict(UserDict):
    '''Rework of dalist that inherits dictionary properties to clean it up.'''
    def __init__(self, locs, tag=''):
        super().__init__()
        self.locs = locs
        self.tag = tag
        self._load()# loads files fully into memory, careful.
        self.original = self.copy()
    
    def _load(self):  
        '''Load files corresponding to each location tag "locs" into a dict.'''
        tag = self.tag
        for key in self.locs:
            with xr.open_dataarray(key+'/'+key+tag+'.nc').load() as value:
                self.__setitem__(key, value)
    
    def view(self, name=None):
        '''Printout of all DataArrays'''
        if name in self.locs:
            # view one station if specified
            viewme = self[name]
            print(viewme.T)
            print('')
        else:
            # otherwise view them all
            for da in self.da_list:
                print(da.T)
                print('')
                
    def save(self):
        q = input('Save? (y/n)') # Check that the user really wants to save
        if q == 'y':
            for key in self:
                da = self[key]
                da['itime'] = np.arange(len(da.itime)) # ensure that itime is integers
                # this is a band-aid
                filename = da.name+'.'+time_tag()+'.nc'
                path = './'+da.name+'/'+filename
                recent_path = './'+da.name+'/'+da.name+'.nc'
                da = da.astype(str)
                print('saving',path)
                da.to_netcdf(path)#, mode='w') # specific dated instance
                print('saving', recent_path)  
                da.to_netcdf(recent_path)#, mode='w') # generic "current" file overwrite
            print('All Saved Successfully.')
    
stations = stationdict(locs)
stations.view('CGO')


<xarray.DataArray 'CGO' (itime: 11, dt: 5)>
array([['0.0', '4655', 'nan', '2019-11-04', 'nan'],
       ['0.0', '5550', 'nan', '2019-12-16', 'nan'],
       ['0.0', '4646', 'nan', '2019-12-16', 'nan'],
       ['0.0', '5547', 'nan', '2020-02-10', 'nan'],
       ['0.0', '5595', 'nan', '2020-02-10', 'nan'],
       ['0.0', '5133', 'nan', '2020-03-09', 'nan'],
       ['0.0', '5031', 'nan', '2020-03-09', 'nan'],
       ['0.0', '5161', 'nan', '2020-03-09', 'nan'],
       ['None', '5631',
        'one of 5631 and 5635 was sent to MHD, the other to CGO',
        '2020-03-20', 'False'],
       ['None', '5635',
        'one of 5631 and 5635 was sent to MHD, the other to CGO',
        '2020-03-20', 'False'],
       ['4506', '4405', '', '2020-03-31', 'False']], dtype=object)
Coordinates:
  * dt       (dt) object 'high' 'low' 'notes' 'outdate' 'returned'
  * itime    (itime) int64 0 1 2 3 4 5 6 7 8 9 10



In [None]:
### External Functions
def time_tag():
    '''just a date tag for now to not save so many files'''
    now = datetime.now()
    now_str = now.strftime("%Y-%m-%d")
    return now_str # a string

def external_save(da_list):
    '''Allows changes to the save procedure after a da_list has been instantiated, 
    and after it has been edited.
    '''
    q = input('Save? (y/n)')
    if q == 'y':
        for da in da_list.da_list:
            filename = da.name+'.'+time_tag()+'.nc'
            path = './'+da.name+'/'+filename
            recent_path = './'+da.name+'/'+da.name+'.nc'
            da = da.astype(str)
            da.to_netcdf(path)#, mode='w') # specific dated instance
            print(path, 'saved')
            da.to_netcdf(recent_path)#, mode='w') # generic "current" file overwrite
            print(recent_path, 'saved')

def pprocess(N):
    '''Proccesses numerical values for more aesthetic printing.'''
    N = int(N)
    if N < 10 and N > 0:
        Nprint = '0'+str(N)
    else:
        Nprint = str(N)
    return Nprint

### Classes

        
        
class dalist():
    def __init__(self, locs, tag=''):
        self.locs = locs
        self.tag = tag
        self.da_list = self.load()
        self.original = self.da_list.copy() # to aid in recovery if needed, and to check as we go
    
    def load(self):  
        '''Load files corresponding to each location tag "locs" into a list.'''
        da_list = []
        tag = self.tag
        for s in self.locs:
            with xr.open_dataarray(s+'/'+s+tag+'.nc').load() as f:
                da_list.append(f)
        return da_list
    
    def index(self, name):
        i = self.locs.index(name)
        return i
    
    def view(self, name=None):
        '''Formatted printout of all dataarrays'''
        if name in self.locs:
            i = self.index(name)
            view_me = self.da_list[i]
            print(view_me.T)
            print('')
        else:
            for da in self.da_list:
                print(da.T)
                print('')

    def save(self):
        q = input('Save? (y/n)')
        if q == 'y':
            for da in self.da_list:
                da['itime'] = np.arange(len(da.itime))
                filename = da.name+'.'+time_tag()+'.nc'
                path = './'+da.name+'/'+filename
                recent_path = './'+da.name+'/'+da.name+'.nc'
                da = da.astype(str)
                print('saving',path)
                da.to_netcdf(path)#, mode='w') # specific dated instance
                print('saving', recent_path)  
                da.to_netcdf(recent_path)#, mode='w') # generic "current" file overwrite
               
                
    ## Make new dataarray for a location
    def new_da(self, name, lows, out_dates, highs, notes=None):
        # doesn't actually use self, but need it to prevent name from being self
        N = len(lows)
        if type(highs) == type(None):
            highs = np.zeros(N)
        if notes == None:
            notes = np.array(['']*N)
        returned = np.array(['False']*N)
        da = xr.DataArray(np.array([lows,
                                    highs,
                                    out_dates,
                                    notes,
                                    returned]), name=name, dims=['dt', 'itime'],
                          coords=[['low', 'high', 'outdate', 'notes','returned'],np.arange(N)])
        return da

    def ship(self, name, low, date, high=None, notes=None):
        '''Add an entry to da_list for a new flask pair that was shipped'''
        low = str(low) # change type for consistency
        i = self.locs.index(name)
        da = self.da_list[i]
        # check if that flask is already at that station (consistent?)
        for flask in self.original[i].sel(dt='low').values:
            if low==flask:
                print('Flask number {} is already at {}'.format(flask, da.name))
                print('')
                print(da.T)
                return
            
        new_entry = self.new_da(name, [low], [date], [high], notes=[notes])
        da1 = xr.concat([da, new_entry], dim='itime')
        self.da_list[i] = da1
        print(da1.T)
        self.save() # ask if you want to save
        
    def receive(self, name, low, notes=None):
        low = str(low) # change type for consistency
        i = self.locs.index(name)
        da = self.da_list[i]
        if low not in da.sel(dt='low'):
            print('------------Flask {} is not currently at {}------------'.format(low, name))
            print('')
            print(da.T)
            print('')
            return
        BL = da.sel(dt='low')!=low
        print(BL.T)
        print(da.T)
        print(da.where(BL, drop=True).T)
        print('')
        self.da_list[i] = da.where(BL, drop=True)
        # save
        self.save()
    
    def ask_receive(self): 
        name = input('station tag: ')
        low = input('low flask number: ')
        notes = input('notes: ')
        for var in [name, low, notes]:
            if var == 'abort':
                self.view(name)
                print('aborted')
                return
           
        self.receive(name, low, notes)
    
    def ask_ship(self):
        name = input('station tag: ')
        low = input('low flask number: ')
        high = input('high flask number: ')
        date = input('Date Shipped: ')
        notes = input('notes: ')
        for var in [name, low, high, date, notes]:
            if var == 'abort':
                self.view(name)
                print('aborted')
                return
        self.ship(name, low, date, high, notes)
    
    def search_all_stations(self, n):
        '''Search all stations for a low-flask number.'''
        n = str(n)
        for da in self.da_list:
            lows = da.sel(dt='low')
            if n in lows:
                print(da)
        
    def overview(self, verbose=False):
        if verbose == True:
            print('Station  ', 'pairs  ', 'oldest  ----------', 'newest  ----------','weeks  ', 'flags')
        else:
            print('Station  ', 'pairs  ', 'weeks  ', 'flags')
        for da in self.da_list:
            N = len(da.T)
            if N > 0:
                oldest_flask = str(da.sel(dt='low')[0].values)
                oldest_day = str(da.sel(dt='outdate')[0].values)
                newest_flask = str(da.sel(dt='low')[-1].values)
                newest_day = str(da.sel(dt='outdate')[-1].values)
                now = datetime.now()
                date = datetime.strptime(newest_day, '%Y-%m-%d')
                diff = now-date
                weeks = diff.days/7.0
                flag = ' '
            else:
                oldest_flask = ' n/a '
                oldest_day = ' n/a '
                newest_flask = ' n/a '
                newest_day = ' n/a '
                weeks = 99
                flag = 'no flasks, '
            if N < 3:
                flag += 'under 3, '
            if N < weeks:
                flag += 'N < weeks, '
            N_print = pprocess(N)
            weeks_print = pprocess(weeks)
            if verbose==True:
                print(da.name,'     ', N_print,'    ', oldest_flask, oldest_day, '   ',
                      newest_flask, newest_day, '   ', weeks_print, '   ', flag)
            else:
                print(da.name,'     ', N_print,'    ', weeks_print, '   ', flag)
        if verbose == True:
            print(
                  '''Flask numbers are all low-valued members of a pair.
                     Weeks = weeks since a box was last shipped there.
                     ''')
            

                
                


    def dalist(self):
        '''An alias for overview()'''
        self.overview()




In [None]:
da_list = dalist(locs)


In [None]:
da_list.view('KUM')
# i = da_list.index('KUM')
# da_list.da_list[i].sel(dt='outdate')[-1] = '2020-03-30'

In [None]:

# for n in [4653,4400]:
#     da_list.ship('NWR', n, '2020-03-26', notes='')

    


In [None]:
for n in [5599,5013]:
    da_list.receive('BRW', n)

In [None]:
da_list.ask_receive()


In [None]:
%%bash 
ls ./???/???.nc 
rm ./???/???.nc


In [None]:
da_list.save()

In [None]:
def external_save(da_list):
        q = input('Save? (y/n)')
        if q == 'y':
            for da in da_list.da_list:
                filename = da.name+'.'+time_tag()+'.nc'
                path = './'+da.name+'/'+filename
                recent_path = './'+da.name+'/'+da.name+'.nc'
                da = da.astype(str)
                da.to_netcdf(path)#, mode='w') # specific dated instance
                print(path, 'saved')
                da.to_netcdf(recent_path)#, mode='w') # generic "current" file overwrite
                print(recent_path, 'saved')
external_save(da_list)

In [None]:
da_list.search_all_stations('5131')

In [None]:
### search for duplicates at all stations

# build list of all low numbers
all_low = []
print('stat','low #','multiplicity')
for da in da_list.da_list:
    lows = da.sel(dt='low').values
    all_low.extend(lows)
#     print(da.name, len(lows))
# check each station, check if there are duplicates anywhere and print results
for da in da_list.da_list:
    lows = da.sel(dt='low').values
    for low in lows:
        count = all_low.count(low)
        if count != 1:
            print(da.name, '', low, '',count)

In [None]:
now = datetime.now()
date = datetime.strptime(date_str, '%Y-%m-%d')
diff = now-date
weeks = diff.days/7.0


In [None]:
cond = da_list.da_list[i].sel(dt='notes') != 'remove_me'
da_list.da_list[i] = da_list.da_list[i].where(cond, drop=True)