In [1]:
from vivid.featureset import AbstractAtom
import pandas as pd



In [2]:
import optuna

In [3]:
df_input = pd.DataFrame([
    [1, 2, 1, 'foo'],
    [2, 2, 1, 'hoge'],
    [3, 1, 2, 'bar'],
], columns=['id', 'price', 'company_id', 'name'])

In [4]:
df_input

Unnamed: 0,id,price,company_id,name
0,1,2,1,foo
1,2,2,1,hoge
2,3,1,2,bar


## Simple Atom

実装の際には `use_columns` に入力のうち使用するカラムを, 加工本体を `call` に実装します

In [5]:
class PricePlusIdAtom(AbstractAtom):
    use_columns = ('price', 'id',)
    
    def call(self, df_input, y=None):
        df_out = pd.DataFrame()
        df_out['price_plus_id'] = df_input['price'] + df_input['id']
        return df_out

In [6]:
PricePlusIdAtom().generate(df_input)

Unnamed: 0,price_plus_id
0,3
1,4
2,4


### String Contains

In [7]:
from vivid.featureset import StringContainsAtom

In [8]:
class NameContainsAtom(StringContainsAtom):
    queryset = {
        'name': ['foo', 'o', 'b'],
    }

In [9]:
NameContainsAtom().generate(df_input)

Unnamed: 0,name_foo,name_o,name_b
0,1,1,0
1,0,1,0
2,0,0,1


## Merge 

外部データとのマージを行うような特徴量

In [10]:
from vivid.featureset import AbstractMergeAtom

In [11]:
df_outer = pd.DataFrame([
    [1, 100],
    [2, 200]
], columns=['company_id', 'value'])

In [12]:
df_outer

Unnamed: 0,company_id,value
0,1,100
1,2,200


In [13]:
class CompanyValueAtom(AbstractMergeAtom):
    merge_key = 'company_id'
    
    def read_outer_dataframe(self):
        return df_outer
    
    def generate_outer_feature(self):
        return self.df_outer[['value']]

In [14]:
CompanyValueAtom().generate(df_input)

Unnamed: 0,value
0,100
1,100
2,200


すべて合わせた特徴量を作りたいときは `pandas.concat` でまとめましょう

In [15]:
atoms = [
    PricePlusIdAtom(),
    NameContainsAtom(),
    CompanyValueAtom()
]

pd.concat([atom.generate(df_input) for atom in atoms], axis=1)

Unnamed: 0,price_plus_id,name_foo,name_o,name_b,value
0,3,1,1,0,100
1,4,0,1,0,100
2,4,0,0,1,200


若しくは `Molecule` をつかってもおっけー

In [16]:
from vivid.featureset.molecules import Molecule

In [17]:
Molecule(atoms, name='sample').generate(df_input)

Unnamed: 0,price_plus_id,name_foo,name_o,name_b,value
0,3,1,1,0,100
1,4,0,1,0,100
2,4,0,0,1,200


In [18]:
from vivid.featureset import create_molecule, find_molecule

In [19]:
create_molecule(atoms, name='hoge1')

<vivid.featureset.molecules.Molecule at 0x7f19312d0a58>

In [20]:
find_molecule(name='hoge1')

[<vivid.featureset.molecules.Molecule at 0x7f19312d0a58>]