# basic `dstruct` usage

---

### 1. sift through a data set for relevant information:

In [1]:
from dstruct import DataStruct, DataField, datafield

In [2]:
raw_data = {
    "a": 1,
    "b": {
        "c": 2,
        "d": 3
    }
}

In [3]:
class A(DataStruct):
    
    a = DataField(int)
    c = DataField(int, 'b', 'c')
    d = DataField(int, 'b', 'd')

In [4]:
A(raw_data)

{"a": 1, "c": 2, "d": 3}

### 2. import data sets from json files

In [5]:
from dstruct import DataStructFromJSON, DataField

In [6]:
class AccountSummaryFromJSON(DataStructFromJSON):
    
    user = DataField(str)
    type = DataField(str, 'account', 'account-type')
    ballance = DataField(float, 'account', 'account-ballance')
    # you can pass functions to `dtype` for simple data parsing or type coersion
    account_number = DataField(lambda s: 'X'*len(s[:-4])+s[-4:], 'account', 'account-number')

In [7]:
AccountSummaryFromJSON('data_files/bank_data.json')

{"ballance": 1234.56, "type": "checking", "account_number": "XXXXX6789", "user": "John F. Doe"}

### 3. import data sets from csv files:

+ The `datafield` decorator, wraps complex parsers as 

In [8]:
from dstruct import DataStructFromCSV, datafield

In [9]:
class AverageUserFromCSV(DataStructFromCSV):

    @datafield(path=None)
    def age(self, data):
        total = 0
        for name in data:
            total += int(data[name]['Age'])
        return round(float(total)/len(data), 1)
    
    @datafield(path=None)
    def weight(self, data):
        total = 0
        for name in data:
            total += int(data[name]['Weight'])
        return round(float(total)/len(data), 1)

In [10]:
AverageUserFromCSV('data_files/wide.csv')

{"age": 40.0, "weight": 174.3}

#### + `DataStructFromCSV` understands wide and narrow form data representations

In [11]:
AverageUserFromCSV('data_files/wide.csv') == AverageUserFromCSV('data_files/narrow.csv')

True