In [37]:
import pandas as pd

from hypex.dataset.dataset import Dataset, ExperimentData
from hypex.dataset.roles import *

# Dataset



In [38]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})

ds = Dataset({'a': TargetRole(), 'b': PreTargetRole()}, data=df)
ds

   a  b
0  1  4
1  2  5
2  3  6

In [39]:
ExperimentData.from_dict({'a': [1, 2, 3]}, {'a': TargetRole()})

   a
0  1
1  2
2  3

In [40]:
ds.roles

{'a': Target(<class 'int'>), 'b': PreTarget(<class 'int'>)}

In [41]:
ds['a']

   a
0  1
1  2
2  3

In [42]:
ds[1]

   1
a  2
b  5

In [43]:
ds['a'][1]

   1
a  2

In [44]:
ds.add_column([7, 8, 9], {'c': TargetRole(int)})
ds

   a  b  c
0  1  4  7
1  2  5  8
2  3  6  9

In [45]:
ds['a'].map(lambda x: x * 2)

   a
0  2
1  4
2  6

In [46]:
ds.unique()

[('a', array([1, 2, 3])),
 ('b', array([4, 5, 6])),
 ('c', array([7, 8, 9], dtype=int64))]

In [47]:
ds.apply(func=lambda x: x['a'] + x['c'] + 7, role={"new": InfoRole()}, axis=1)

   new
0   15
1   17
2   19

In [48]:
print(ds.roles)

{'a': Target(<class 'int'>), 'b': PreTarget(<class 'int'>), 'c': Target(<class 'int'>)}


In [49]:
ds.add_column(ds.apply(func=lambda x: x['a'] + x['c'] + 7, role={"new": InfoRole()}, axis=1))
ds

   a  b  c  new
0  1  4  7   15
1  2  5  8   17
2  3  6  9   19

In [50]:
ds.roles

{'a': Target(<class 'int'>),
 'b': PreTarget(<class 'int'>),
 'c': Target(<class 'int'>),
 'new': Info(<class 'int'>)}

In [51]:
ds.add_column([7, 8, 9], {'f': TargetRole(int)}, index=[2, 0, 1])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[name][i] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[name][i] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[name][i] = value


In [52]:
ds.loc[:, 'b':'new']

   b  c  new
0  4  7   15
1  5  8   17
2  6  9   19

In [53]:
ds['e'] = [1, 2, 3]
ds



   a  b  c  new  f  e
0  1  4  7   15  8  1
1  2  5  8   17  9  2
2  3  6  9   19  7  3

In [54]:
ds['a'][1] = 1
ds

   a  b  c  new  f  e
0  1  4  7   15  8  1
1  2  5  8   17  9  2
2  3  6  9   19  7  3

In [55]:
ds.isin([3, 8])

   a  b  c  new  f      e
0  0  0  0    0  1  False
1  0  0  1    0  0  False
2  1  0  0    0  0   True

In [56]:
groups_func = ds.groupby('a', func='mean')
print(groups_func)

[(1,       a  b  c  new  f     e
mean  1  4  7   15  8  True), (2,       a  b  c  new  f     e
mean  2  5  8   17  9  True), (3,       a  b  c  new  f     e
mean  3  6  9   19  7  True)]


In [72]:
groups = ds.groupby('a')
print(groups)

[(1,    a  b  c  new  f     e
0  1  4  7   15  8  True), (2,    a  b  c  new  f     e
1  2  5  8   17  9  True), (3,    a  b  c  new  f     e
2  3  6  9   19  7  True)]


In [58]:
groups_func_fields = ds.groupby('a', func=['sum', 'min'], fields_list=['new', 'e'])
print(groups_func_fields)

[(1,      new     e
sum   15  True
min   15  True), (2,      new     e
sum   17  True
min   17  True), (3,      new     e
sum   19  True
min   19  True)]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[column_name] = self.data[column_name].astype(type_name)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[column_name] = self.data[column_name].astype(type_name)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[column_name] = self.data[column_name].astype(type_name)
A value

In [59]:
ds.agg(['sum', 'mean'])

      a   b   c  new   f     e
sum   6  15  24   51  24  True
mean  2   5   8   17   8  True

In [60]:
ds.mean()

      a  b  c  new  f     e
mean  2  5  8   17  8  True

In [61]:
ds_empty = Dataset(pd.DataFrame())._create_empty()

In [62]:
ds_empty.columns

Index([], dtype='object')

In [63]:
ds_empty._backend

Empty DataFrame
Columns: []
Index: []

In [73]:
ds_from_dict = Dataset.from_dict({'a': [1, 2], 'b': [3, 4]}, {'a': TargetRole(), 'b': InfoRole()})

In [74]:
ds_from_dict

   a  b
0  1  3
1  2  4

In [75]:
ds_from_dict.roles

{'a': Target(None), 'b': Info(None)}

In [77]:
ds

   a  b  c  new  f  e
0  1  4  7   15  8  1
1  2  5  8   17  9  2
2  3  6  9   19  7  3

In [78]:
ds.append(ds)

   a  b  c  new  f     e
0  1  4  7   15  8  True
1  2  5  8   17  9  True
2  3  6  9   19  7  True
0  1  4  7   15  8  True
1  2  5  8   17  9  True
2  3  6  9   19  7  True

In [79]:
ds_from_dict.roles

{'a': Target(<class 'int'>),
 'b': PreTarget(<class 'int'>),
 'c': Target(<class 'int'>),
 'new': Info(<class 'int'>),
 'f': Target(<class 'int'>),
 'e': Info(<class 'bool'>)}

# Eperiment Data

In [80]:
ed = ExperimentData(ds)._create_empty()

In [81]:
ed.additional_fields

Empty DataFrame
Columns: []
Index: []

In [82]:
ed.additional_fields.loc[:, :]

Empty DataFrame
Columns: []
Index: []