## Tables
Module to simplify handling of input and output tables (as .csv files).  For now, this assumes that all files are
archived in a directory 'xyz.d', where 'xyz' is the 'data set name'.  Eventually, we will have a way of
archiving sets of files in .zip files.

In [1]:
import pandas as pd
import os.path
import hashlib

In [2]:
class Table(object):
    
    DSNAME = None     # default data set name
    DSTYPE = 'dir'    # someday we will allow 'zip' for zip archives
    
    def __init__(self,table_name,ds_name=None,columns=None,index_col=None):
        if ds_name is None and self.DSNAME is not None:
            ds_name = self.DSNAME
        self.ds_name = ds_name
        self.table_name = table_name
        self.prefix = None
        self.file_name = None
        self.columns = columns
        self.index_col = index_col
        self.data = pd.DataFrame()
        
    def _file_name(self,prefix=None):
        self.prefix = prefix
        if prefix:
            n = prefix + '-' + self.table_name
        else:
            n = self.table_name
        return self.ds_name + '.d/' + n + '.csv'
        
    def read(self,file_name=None):
        if not file_name:
            file_name = self._file_name()
        self.file_name = file_name
        try:
            self.data = pd.read_csv(file_name,usecols=self.columns,index_col=self.index_col)
        except ValueError as err:
            msg = err.args[0]
            if msg.endswith('is not in list'):
                c = msg.split("'")[1]
                raise ValueError("'{}' is not in the set of columns in file '{}'".format(c,file_name))
            if msg.startswith('Index') and msg.endswith('invalid'):
                raise ValueError("Index column '{}' is not in the set of columns in file '{}'".format(self.index_col,file_name))
            raise
        return self.data
    
    def write(self,file_name=None,precision=None,index=False,prefix=None):
        if not file_name and prefix is None:
            file_name = self.file_name
        if not file_name:
            file_name = self._file_name(prefix=prefix)
        self.file_name = file_name
        float_format = None
        if precision is not None:
            float_format = '%.{:d}g'.format(precision)
        self.data.to_csv(file_name,index=index,float_format=float_format)
        
    def basename(self,file_name=None):
        if file_name is None:
            file_name = self.file_name
        return os.path.basename(file_name)
    
    def signature(self):
        file_name = self.file_name
        return (self.basename(),signature(file_name))
    
def signature(file_name):
    f = open(file_name,mode='rb')
    m = hashlib.sha256(f.read())
    f.close()
    return m.hexdigest()

In [3]:
Table.DSNAME = 'frame-6'
t = Table('nodes',columns=['NODEID','X','Y'])

In [4]:
t.read()

Unnamed: 0,NODEID,X,Y
0,A,0,0
1,B,0,4000
2,C,8000,4000
3,D,8000,0


In [5]:
t.data.loc[:,['X','Y']] /= 3.
t.data

Unnamed: 0,NODEID,X,Y
0,A,0.0,0.0
1,B,0.0,1333.333333
2,C,2666.666667,1333.333333
3,D,2666.666667,0.0


In [6]:
t.write(precision=7,prefix='out')

In [7]:
t.signature()

('out-nodes.csv',
 '71080f20c6f926bb9ef71cfe01103ed5e3ba618bb305f713e26194198220ecce')

In [8]:
vars(t)

{'columns': ['NODEID', 'X', 'Y'], 'data':   NODEID            X            Y
 0      A     0.000000     0.000000
 1      B     0.000000  1333.333333
 2      C  2666.666667  1333.333333
 3      D  2666.666667     0.000000, 'ds_name': 'frame-6', 'file_name': 'frame-6.d/out-nodes.csv', 'index_col': None, 'prefix': 'out', 'table_name': 'nodes'}

In [9]:
t.read()

Unnamed: 0,NODEID,X,Y
0,A,0,0
1,B,0,4000
2,C,8000,4000
3,D,8000,0


In [10]:
vars(t)

{'columns': ['NODEID', 'X', 'Y'], 'data':   NODEID     X     Y
 0      A     0     0
 1      B     0  4000
 2      C  8000  4000
 3      D  8000     0, 'ds_name': 'frame-6', 'file_name': 'frame-6.d/nodes.csv', 'index_col': None, 'prefix': None, 'table_name': 'nodes'}