In [1]:
import pandas as pd

from hypex.dataset.dataset import Dataset, ExperimentData
from hypex.dataset.roles import *

# Dataset



In [2]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})

ds = Dataset({'a': TargetRole(), 'b': TargetRole(float)}, data=df)
ds

   a    b
0  1  4.0
1  2  5.0
2  3  6.0

In [3]:
ds.roles

{'a': Target(<class 'int'>), 'b': Target(<class 'float'>)}

In [4]:
ds.search_columns(TargetRole(), search_types=[int])

['a']

In [5]:
ds['a']

   a
0  1
1  2
2  3

In [6]:
ds.min()

     a    b
min  1  4.0

In [7]:
ds[1]

     1
a  2.0
b  5.0

In [8]:
ds['a'][1]

   1
a  2

In [9]:
ds.data[ds.data[['a', 'b']] == 4]

Unnamed: 0,a,b
0,,4.0
1,,
2,,


In [10]:
ds[ds[['a', 'b']] == 4]

    a    b
0 NaN  4.0
1 NaN  NaN
2 NaN  NaN

In [11]:
ds.add_column([7, 8, 9], {'c': TargetRole(int)})
ds

   a    b  c
0  1  4.0  7
1  2  5.0  8
2  3  6.0  9

In [12]:
ds['a'].map(lambda x: x * 2)

   a
0  2
1  4
2  6

In [13]:
ds.unique()

{'a': array([1, 2, 3], dtype=int64),
 'b': array([4., 5., 6.]),
 'c': array([7, 8, 9], dtype=int64)}

In [14]:
ds.apply(func=lambda x: x['a'] + x['c'] + 7, role={"new": InfoRole()}, axis=1)

    new
0  15.0
1  17.0
2  19.0

In [15]:
print(ds.roles)

{'a': Target(<class 'int'>), 'b': Target(<class 'float'>), 'c': Target(<class 'int'>)}


In [16]:
ds.add_column(ds.apply(func=lambda x: x['a'] + x['c'] + 7, role={"new": InfoRole()}, axis=1))
ds

   a    b  c   new
0  1  4.0  7  15.0
1  2  5.0  8  17.0
2  3  6.0  9  19.0

In [17]:
ds.roles

{'a': Target(<class 'int'>),
 'b': Target(<class 'float'>),
 'c': Target(<class 'int'>),
 'new': Info(<class 'float'>)}

In [18]:
ds.add_column([7, 8, 9], {'f': TargetRole(int)}, index=[2, 0, 1])

In [19]:
ds

   a    b  c   new  f
0  1  4.0  7  15.0  8
1  2  5.0  8  17.0  9
2  3  6.0  9  19.0  7

In [20]:
ds['e'] = [1, 2, 3]
ds



   a    b  c   new  f  e
0  1  4.0  7  15.0  8  1
1  2  5.0  8  17.0  9  2
2  3  6.0  9  19.0  7  3

In [21]:
ds['a'][1] = 1
ds

   a    b  c   new  f  e
0  1  4.0  7  15.0  8  1
1  2  5.0  8  17.0  9  2
2  3  6.0  9  19.0  7  3

In [22]:
ds.isin([3, 8])

       a      b      c    new      f      e
0  False  False  False  False   True  False
1  False  False   True  False  False  False
2   True  False  False  False  False   True

In [23]:
ds.roles

{'a': Target(<class 'int'>),
 'b': Target(<class 'float'>),
 'c': Target(<class 'int'>),
 'new': Info(<class 'float'>),
 'f': Target(<class 'int'>),
 'e': Info(None)}

In [24]:
groups_func = ds.groupby('a', func='mean')
print(groups_func)

[(1,         a    b    c   new    f    e
mean  1.0  4.0  7.0  15.0  8.0  1.0), (2,         a    b    c   new    f    e
mean  2.0  5.0  8.0  17.0  9.0  2.0), (3,         a    b    c   new    f    e
mean  3.0  6.0  9.0  19.0  7.0  3.0)]


In [25]:
groups = ds.groupby('a')
print(groups)

[(1,    a    b  c   new  f  e
0  1  4.0  7  15.0  8  1), (2,    a    b  c   new  f  e
1  2  5.0  8  17.0  9  2), (3,    a    b  c   new  f  e
2  3  6.0  9  19.0  7  3)]


In [26]:
groups_func_fields = ds.groupby('a', func=['mean', 'var'], fields_list='e')
print(groups_func_fields)

[(1,         e
mean  1.0
var   NaN), (2,         e
mean  2.0
var   NaN), (3,         e
mean  3.0
var   NaN)]


In [27]:
ds.agg(['sum', 'mean'])

        a     b     c   new     f    e
sum   6.0  15.0  24.0  51.0  24.0  6.0
mean  2.0   5.0   8.0  17.0   8.0  2.0

In [28]:
ds.mean()

        a    b    c   new    f    e
mean  2.0  5.0  8.0  17.0  8.0  2.0

In [29]:
ds_empty = Dataset.create_empty()

In [30]:
ds_empty.columns

Index([], dtype='object')

In [31]:
ds_empty.backend

Empty DataFrame
Columns: []
Index: []

In [32]:
ds_from_dict = Dataset.from_dict({'a': [1, 2], 'b': [3, 4]}, {'a': TargetRole(), 'b': InfoRole()})

In [33]:
ds_from_dict

   a  b
0  1  3
1  2  4

In [34]:
ds_from_dict.roles

{'a': Target(None), 'b': Info(None)}

In [35]:
ds

   a    b  c   new  f  e
0  1  4.0  7  15.0  8  1
1  2  5.0  8  17.0  9  2
2  3  6.0  9  19.0  7  3

In [36]:
ds.append(ds)

   a    b  c   new  f  e
0  1  4.0  7  15.0  8  1
1  2  5.0  8  17.0  9  2
2  3  6.0  9  19.0  7  3
0  1  4.0  7  15.0  8  1
1  2  5.0  8  17.0  9  2
2  3  6.0  9  19.0  7  3

In [37]:
display(ds_from_dict.roles)
ds_from_dict.rename({'b': 'B', 'a': 'V'})

{'a': Target(None), 'b': Info(None)}

   V  B
0  1  3
1  2  4

# Eperiment Data

In [38]:
ed = ExperimentData(ds)

In [39]:
ed.additional_fields

Empty DataFrame
Columns: []
Index: [0, 1, 2]

In [40]:
ed.additional_fields.loc[:, :]

Empty DataFrame
Columns: []
Index: [0, 1, 2]