In [2]:
import pandas as pd

from hypex.dataset.dataset import Dataset, ExperimentData
from hypex.dataset.roles import *

# Dataset



In [3]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})

ds = Dataset(df, roles={'a': TargetRole(), 'b': PreTargetRole()})
ds

   a  b
0  1  4
1  2  5
2  3  6

In [4]:
ExperimentData(None).from_dict({'a': [1, 2, 3]})



   a
0  1
1  2
2  3

In [5]:
ds.roles

{'a': <hypex.dataset.roles.TargetRole at 0x183b6eee560>,
 'b': <hypex.dataset.roles.PreTargetRole at 0x183b6eed300>}

In [6]:
ds['a']

   a
0  1
1  2
2  3

In [7]:
ds[1]

   1
a  2
b  5

In [8]:
ds['a'][1]

   1
a  2

In [9]:
ds.add_column([7, 8, 9], 'c', TargetRole)
ds

   a  b  c
0  1  4  7
1  2  5  8
2  3  6  9

In [10]:
ds.apply(func=lambda x: x['a'] + x['c'] + 7, axis=1)



    0
0  15
1  17
2  19

In [11]:
ds.add_column(ds.apply(func=lambda x: x['a'] + x['c'] + 7, axis=1), 'd', TargetRole)
ds



   a  b  c   d
0  1  4  7  15
1  2  5  8  17
2  3  6  9  19

In [12]:
ds.roles

{'a': <hypex.dataset.roles.TargetRole at 0x183b6eee560>,
 'b': <hypex.dataset.roles.PreTargetRole at 0x183b6eed300>,
 'c': hypex.dataset.roles.TargetRole,
 'd': hypex.dataset.roles.TargetRole}

In [13]:
ds.add_column([7, 8, 9], 'f', TargetRole, index=[2, 0, 1])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[name][i] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[name][i] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[name][i] = value


In [14]:
ds.loc[:, 'c']



   c
0  7
1  8
2  9

In [15]:
ds['e'] = [1, 2, 3]
ds



   a  b  c   d  f  e
0  1  4  7  15  8  1
1  2  5  8  17  9  2
2  3  6  9  19  7  3

In [16]:
ds['a'][1] = 1
ds

   a  b  c   d  f  e
0  1  4  7  15  8  1
1  2  5  8  17  9  2
2  3  6  9  19  7  3

In [17]:
ds.isin([3, 8])



       a      b      c      d      f      e
0  False  False  False  False   True  False
1  False  False   True  False  False  False
2   True  False  False  False  False   True

In [18]:
groups_func = ds.groupby('a', func='mean')
print(next(groups_func))

(1,       0
a   1.0
b   4.0
c   7.0
d  15.0
f   8.0
e   1.0)


  groups = self.data.groupby(by, axis, **kwargs)


In [19]:
groups = ds.groupby('a')
print(next(groups))

(1,    a  b  c   d  f  e
0  1  4  7  15  8  1)


  groups = self.data.groupby(by, axis, **kwargs)


In [20]:
groups_func_fields = ds.groupby('a', func='sum', fields_list=['d', 'e'])
print(next(groups_func_fields))

(1,     0
d  15
e   1)


  groups = self.data.groupby(by, axis, **kwargs)


In [21]:
ds.mean()



      0
a   2.0
b   5.0
c   8.0
d  17.0
f   8.0
e   2.0

In [22]:
ds_empty = Dataset(pd.DataFrame())._create_empty()



In [23]:
ds_empty.columns

Index([], dtype='object')

In [24]:
ds_empty._backend

Empty DataFrame
Columns: []
Index: []

In [25]:
# you can add backend as data like this: dataset._backend
ds_from_dict = Dataset().from_dict({'a': [1, 2], 'b': [3, 4]})



In [26]:
ds_from_dict

   a  b
0  1  3
1  2  4

In [27]:
ds_from_dict.append(ds)



   a  b    c     d    f    e
0  1  3  NaN   NaN  NaN  NaN
1  2  4  NaN   NaN  NaN  NaN
0  1  4  7.0  15.0    8  1.0
1  2  5  8.0  17.0    9  2.0
2  3  6  9.0  19.0    7  3.0

# Eperiment Data

In [28]:
ed = ExperimentData(pd.DataFrame())._create_empty()



In [29]:
ed.additional_fields

Empty DataFrame
Columns: []
Index: []

In [30]:
ed.additional_fields.loc[:, :]



Empty DataFrame
Columns: []
Index: []