In [1]:
import pandas as pd

from hypex.dataset.dataset import Dataset, ExperimentData
from hypex.dataset.roles import *

# Dataset



In [34]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})

ds = Dataset({'a': TargetRole(), 'b': PreTargetRole()}, data=df)
ds

   a  b
0  1  4
1  2  5
2  3  6

In [35]:
ExperimentData.from_dict({'a': [1, 2, 3]}, {'a': TargetRole()}, 'pandas')

   a
0  1
1  2
2  3

In [36]:
ds.roles

{'a': Target, 'b': PreTarget}

In [37]:
ds['a']

   a
0  1
1  2
2  3

In [38]:
ds[1]

   1
a  2
b  5

In [39]:
ds['a'][1]

   1
a  2

In [40]:
ds.add_column([7, 8, 9], {'c': TargetRole()})
ds

   a  b  c
0  1  4  7
1  2  5  8
2  3  6  9

In [41]:
ds.apply(func=lambda x: x['a'] + x['c'] + 7, role={"new": InfoRole()}, axis=1)

   new
0   15
1   17
2   19

In [42]:
print(ds.roles)

{'a': Target, 'b': PreTarget, 'c': Target}


In [43]:
ds.add_column(ds.apply(func=lambda x: x['a'] + x['c'] + 7, role={"new": InfoRole()}, axis=1))
ds

   a  b  c  new
0  1  4  7   15
1  2  5  8   17
2  3  6  9   19

In [44]:
ds.roles

{'a': Target, 'b': PreTarget, 'c': Target, 'new': Info}

In [45]:
ds.add_column([7, 8, 9], {'f': TargetRole()}, index=[2, 0, 1])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[name][i] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[name][i] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[name][i] = value


In [46]:
ds.loc[:, 'b':'new']

   b  c  new
0  4  7   15
1  5  8   17
2  6  9   19

In [15]:
ds['e'] = [1, 2, 3]
ds



   a  b  c  new  f  e
0  1  4  7   15  8  1
1  2  5  8   17  9  2
2  3  6  9   19  7  3

In [16]:
ds['a'][1] = 1
ds

   a  b  c  new  f  e
0  1  4  7   15  8  1
1  2  5  8   17  9  2
2  3  6  9   19  7  3

In [17]:
ds.isin([3, 8])

   a  b      c  new      f      e
0  0  0  False    0   True  False
1  0  0   True    0  False  False
2  1  0  False    0  False   True

In [18]:
groups_func = ds.groupby('a', func='mean')
print(groups_func)

[(1,       a  b     c  new     f     e
mean  1  4  True   15  True  True), (2,       a  b     c  new     f     e
mean  2  5  True   17  True  True), (3,       a  b     c  new     f     e
mean  3  6  True   19  True  True)]


In [19]:
groups = ds.groupby('a')
print(groups)

[(1,    a  b     c  new     f     e
0  1  4  True   15  True  True), (2,    a  b     c  new     f     e
1  2  5  True   17  True  True), (3,    a  b     c  new     f     e
2  3  6  True   19  True  True)]


In [20]:
groups_func_fields = ds.groupby('a', func=['sum', 'min'], fields_list=['new', 'e'])
print(groups_func_fields)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[column_name] = self.data[column_name].astype(type_name)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[column_name] = self.data[column_name].astype(type_name)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[column_name] = self.data[column_name].astype(type_name)
A value

[(1,      new     e
sum   15  True
min   15  True), (2,      new     e
sum   17  True
min   17  True), (3,      new     e
sum   19  True
min   19  True)]


In [21]:
ds.agg(['sum', 'mean'])

      a   b     c  new     f     e
sum   6  15  True   51  True  True
mean  2   5  True   17  True  True

In [22]:
ds.mean()

      a  b     c  new     f     e
mean  2  5  True   17  True  True

In [23]:
ds_empty = Dataset(pd.DataFrame())._create_empty()

In [24]:
ds_empty.columns

Index([], dtype='object')

In [25]:
ds_empty._backend

Empty DataFrame
Columns: []
Index: []

In [26]:
ds_from_dict = Dataset.from_dict({'a': [1, 2], 'b': [3, 4]}, {'a': TargetRole(), 'b': InfoRole()}, 'pandas')

In [27]:
ds_from_dict

   a  b
0  1  3
1  2  4

In [28]:
ds_from_dict.roles

{'a': Target, 'b': Info}

In [29]:
ds_from_dict.append(ds, )

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

In [ ]:
ds_from_dict.roles

# Eperiment Data

In [ ]:
ed = ExperimentData(ds)._create_empty()

In [ ]:
ed.additional_fields

In [ ]:
ed.additional_fields.loc[:, :]