In [1]:
import pandas as pd

from hypex.dataset.dataset import Dataset, ExperimentData
from hypex.dataset.roles import *

# Dataset


In [2]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})

ds = Dataset(df, roles={TargetRole: 'a', PreTargetRole: 'b'})
ds

   a  b
0  1  4
1  2  5
2  3  6

In [3]:
ds.roles

{'a': hypex.dataset.roles.TargetRole, 'b': hypex.dataset.roles.PreTargetRole}

In [4]:
ds['a']

0    1
1    2
2    3
Name: a, dtype: int64

In [5]:
ds[1]

a    2
b    5
Name: 1, dtype: int64

In [6]:
ds['a'][1]

2

In [7]:
ds.add_column([7, 8, 9], 'c', TargetRole)
ds

   a  b  c
0  1  4  7
1  2  5  8
2  3  6  9

In [8]:
ds.apply(func=lambda x: x['a'] + x['c'] + 7, axis=1)

    0
0  15
1  17
2  19

In [9]:
ds.add_column(ds.apply(func=lambda x: x['a'] + x['c'] + 7, axis=1), 'd', TargetRole)
ds

   a  b  c   d
0  1  4  7  15
1  2  5  8  17
2  3  6  9  19

In [10]:
ds.roles

{'a': hypex.dataset.roles.TargetRole,
 'b': hypex.dataset.roles.PreTargetRole,
 'c': hypex.dataset.roles.TargetRole,
 'd': hypex.dataset.roles.TargetRole}

In [11]:
ds.loc[:, 'c']

   c
0  7
1  8
2  9

In [12]:
ds['e'] = [1, 2, 3]
ds



   a  b  c   d  e
0  1  4  7  15  1
1  2  5  8  17  2
2  3  6  9  19  3

In [13]:
ds['a'][1] = 1
ds

   a  b  c   d  e
0  1  4  7  15  1
1  1  5  8  17  2
2  3  6  9  19  3

In [14]:
ds.isin([3, 8])

       a      b      c      d      e
0  False  False  False  False  False
1  False  False   True  False  False
2   True  False  False  False   True

In [15]:
groups_func = ds.groupby('a', func='mean')
print(next(groups_func))

(1,       0
b   4.5
c   7.5
d  16.0
e   1.5)


In [16]:
groups = ds.groupby('a')
print(next(groups))

(1,    b  c   d  e
0  4  7  15  1
1  5  8  17  2)


In [17]:
groups_func_fields = ds.groupby('a', func='sum', fields_list=['d', 'e'])
print(next(groups_func_fields))

(1,     0
d  32
e   3)


In [18]:
ds_empty = Dataset(pd.DataFrame())._create_empty()

In [19]:
ds_empty.columns

Index([], dtype='object')

In [20]:
ds_empty._backend

Empty DataFrame
Columns: []
Index: []

In [21]:
# you can add backend as data like this: dataset._backend
ds_from_dict = Dataset().from_dict({'a': [1, 2], 'b': [3, 4]})

In [22]:
ds_from_dict

   a  b
0  1  3
1  2  4

In [26]:
ds_from_dict.append(ds)

   a  b    c     d    e
0  1  3  NaN   NaN  NaN
1  2  4  NaN   NaN  NaN
0  1  4  7.0  15.0  1.0
1  1  5  8.0  17.0  2.0
2  3  6  9.0  19.0  3.0

# Eperiment Data

In [23]:
ed = ExperimentData(pd.DataFrame())._create_empty()

In [24]:
ed.additional_fields

Empty DataFrame
Columns: []
Index: []

In [25]:
ed.additional_fields.loc[:, :]

Empty DataFrame
Columns: []
Index: []