# sdata usage

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%autosave 0
    
import logging
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S')

import os
import sys
import numpy as np
import pandas as pd
import sdata
import uuid

Autosave disabled


03:22:24 DEBUG:Loaded backend module://ipykernel.pylab.backend_inline version unknown.


## create a Data object

In [2]:
data = sdata.Data(name="data", uuid="38b26864e7794f5182d38459bab85841")
data.metadata.add("my_key", 123, unit="m^3", description="a volume")
data.metadata.add("force", 1.234, unit="kN", description="x force")
data

(Data 'data':38b26864e7794f5182d38459bab85841)

In [3]:
data.metadata.df

Unnamed: 0_level_0,name,value,dtype,unit,description
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sdata_version,sdata_version,0.8.0,str,-,
name,name,data,str,-,
uuid,uuid,38b26864e7794f5182d38459bab85841,str,-,
my_key,my_key,123,int,m^3,a volume
force,force,1.234,float,kN,x force


In [4]:
data.uuid

'38b26864e7794f5182d38459bab85841'

In [5]:
#help(data)

## create a hierarchy of objects

In [6]:
group1 = sdata.Data(name="group1", uuid="dbc894745fb04f7e87a990bdd4ba97c4")
print(group1)

(Data 'group1':dbc894745fb04f7e87a990bdd4ba97c4)


In [7]:
data1 = sdata.Data(name="data1", uuid="38b26864e7794f5182d38459bab8584f")
data2 = sdata.Data(name="data2", uuid="b1fd2643-af55-4b33-b044-22070a0dc7c7")
data3 = sdata.Data(name="data3", uuid=uuid.UUID("664577c2d3134b598bc4d6c13f20b71a"))

group1.add_data(data1)
group1.add_data(data2)
group1.add_data(data3)
print(group1.group)

OrderedDict([('38b26864e7794f5182d38459bab8584f', (Data 'data1':38b26864e7794f5182d38459bab8584f)), ('b1fd2643-af55-4b33-b044-22070a0dc7c7', (Data 'data2':b1fd2643-af55-4b33-b044-22070a0dc7c7)), ('664577c2d3134b598bc4d6c13f20b71a', (Data 'data3':664577c2d3134b598bc4d6c13f20b71a))])


In [8]:
group1.keys()

['38b26864e7794f5182d38459bab8584f',
 'b1fd2643-af55-4b33-b044-22070a0dc7c7',
 '664577c2d3134b598bc4d6c13f20b71a']

In [9]:
group1.values()

[(Data 'data1':38b26864e7794f5182d38459bab8584f),
 (Data 'data2':b1fd2643-af55-4b33-b044-22070a0dc7c7),
 (Data 'data3':664577c2d3134b598bc4d6c13f20b71a)]

## working with Data

data1a = group1.get_data_by_uuid(uid="38b26864e7794f5182d38459bab8584f")
assert data1a.name == "data1"
assert data1a.uuid == "38b26864e7794f5182d38459bab8584f"

data3a = group1.get_data_by_uuid(uid="664577c2d3134b598bc4d6c13f20b71a")
assert data3a.name == "data3"
assert data3a.uuid == "664577c2d3134b598bc4d6c13f20b71a"

In [10]:
data1b = group1.get_data_by_name("data1")
assert data1b.name == "data1"
assert data1b.uuid == "38b26864e7794f5182d38459bab8584f"
data1b

(Data 'data1':38b26864e7794f5182d38459bab8584f)

## Data IO

### Folder IO

In [11]:
group1.to_folder(path="/tmp/b/", dtype="xlsx")

03:22:25 DEBUG:clear_folder: rm /tmp/b/data-data1
03:22:25 DEBUG:clear_folder: rm /tmp/b/data-data2
03:22:25 DEBUG:clear_folder: rm /tmp/b/data-data3


'/tmp/b/'

In [12]:
group1.tree_folder("/tmp/b")

 └─b
   ├─data-data1
   | └─data1.xlsx
   ├─data-data2
   | └─data2.xlsx
   ├─data-data3
   | └─data3.xlsx
   └─group1.xlsx


In [13]:
group1.to_folder(path="/tmp/a/", dtype="csv")

03:22:25 DEBUG:clear_folder: rm /tmp/a/data-data1
03:22:25 DEBUG:clear_folder: rm /tmp/a/data-data2
03:22:25 DEBUG:clear_folder: rm /tmp/a/data-data3
03:22:25 DEBUG:export meta csv '/tmp/a/metadata.csv'
03:22:25 DEBUG:export meta csv '/tmp/a/data-data1/metadata.csv'
03:22:25 DEBUG:export meta csv '/tmp/a/data-data2/metadata.csv'
03:22:25 DEBUG:export meta csv '/tmp/a/data-data3/metadata.csv'


'/tmp/a/'

In [14]:
group1.tree_folder("/tmp/a")

 └─a
   ├─data-data1
   | └─metadata.csv
   ├─data-data2
   | └─metadata.csv
   ├─data-data3
   | └─metadata.csv
   ├─data.xlsx
   └─metadata.csv


In [15]:
group1_folder = sdata.Data.from_folder(path="/tmp/a")
print("!", group1_folder)
assert (group1.name==group1_folder.name)
assert (group1.uuid==group1_folder.uuid)
print(group1_folder.dir())
group1_folder

read table /tmp/a/data.xlsx
! (Data 'group1':dbc894745fb04f7e87a990bdd4ba97c4)
[('data1', []), ('data2', []), ('data3', [])]


(Data 'group1':dbc894745fb04f7e87a990bdd4ba97c4)

### Excel IO (wip)

In [16]:
data.to_xlsx(filepath="/tmp/data1.xlsx")
data_xlsx = sdata.Data.from_xlsx(filepath="/tmp/data1.xlsx")
print(data.name, data_xlsx.name)
print(data.uuid, data_xlsx.uuid)
#assert data.name==data_xlsx.name
data_xlsx

data data
38b26864e7794f5182d38459bab85841 38b26864e7794f5182d38459bab85841


(Data 'data':38b26864e7794f5182d38459bab85841)

## inspect Data

In [17]:
group1.dir()

[('data1', []), ('data2', []), ('data3', [])]

In [18]:
[x for x in dir(data) if not x.startswith("_")]

['ATTR_NAMES',
 'add_data',
 'clear_folder',
 'clear_group',
 'describe',
 'description',
 'df',
 'dir',
 'filename',
 'from_csv',
 'from_folder',
 'from_json',
 'from_xlsx',
 'get_data_by_name',
 'get_data_by_uuid',
 'get_download_link',
 'get_group',
 'group',
 'items',
 'keys',
 'metadata',
 'name',
 'osname',
 'prefix',
 'sha3_256',
 'table',
 'to_csv',
 'to_folder',
 'to_json',
 'to_xlsx',
 'to_xlsx_base64',
 'to_xlsx_byteio',
 'tree_folder',
 'uuid',
 'values',
 'verify_attributes']

### Data with comments

In [19]:
datac = sdata.Data(name="data", uuid="38b26864e7794f5182d38459bab85842", description="this is remarkable")
datac

(Data 'data':38b26864e7794f5182d38459bab85842)

In [20]:
datac.description += """\n\nhere is your comment\nwhat's next?"""

In [21]:
print(datac.description)

this is remarkable

here is your comment
what's next?


In [22]:
datac.to_xlsx("/tmp/data_with_description.xlsx")

In [23]:
datac2 = sdata.Data.from_xlsx("/tmp/data_with_description.xlsx")
print(datac2.description)

this is remarkable

here is your comment
what's next?
