In [1]:
from pkg.datasets import Datasets
from pkg.analysis import Analyst
from pkg.model import Method

import pandas as pd
import numpy as np

In [2]:
# add a dataset from DataFrame Object

data_1 = np.random.random(36).reshape(9, 4)
data_1 = pd.DataFrame(data_1, columns=['A', 'B', 'C', 'D',])
data_1

Unnamed: 0,A,B,C,D
0,0.414321,0.14498,0.777902,0.029038
1,0.461181,0.065903,0.259346,0.149122
2,0.942654,0.069991,0.497414,0.438539
3,0.017955,0.943862,0.721331,0.967361
4,0.346891,0.08721,0.158094,0.935174
5,0.400353,0.017249,0.417507,0.617182
6,0.970523,0.387487,0.104292,0.558032
7,0.753903,0.07687,0.696372,0.030047
8,0.481776,0.688745,0.412401,0.473904


In [3]:
data_2 = np.random.randint(4, size=(9, 4))
data_2 = pd.DataFrame(data_2, columns=['A', 'B', 'C', 'D',])
data_2

Unnamed: 0,A,B,C,D
0,3,1,0,3
1,1,3,1,2
2,2,0,3,0
3,1,0,0,2
4,1,2,2,0
5,0,2,3,3
6,2,1,2,1
7,3,2,2,1
8,2,3,2,1


In [4]:
test_data = Datasets(name='test')  # data will be saved under the 'test' folder
test_data.add_item_from_dataframe(data=data_1, name="basic", description="A random generated dataset for multiply.", save=True)
test_data.add_item_from_dataframe(data=data_2, name='multiplier', category='source', description="A random generated multiplier.")

test_data.report();

+------------+--------------------------------+---------------------+----------+
|    Name    |          Description           |         Path        | Category |
+------------+--------------------------------+---------------------+----------+
|   basic    | A random generated dataset for |    test/basic.csv   |  assets  |
|            |           multiply.            |                     |          |
| multiplier | A random generated multiplier. | test/multiplier.csv |  source  |
+------------+--------------------------------+---------------------+----------+


In [5]:
def multiply_one_by_one(dataset):
    # Retrieve data from dataset
    data = dataset.get_item('basic').obj
    multiplier = dataset.get_item('multiplier').obj
    if not data.shape == multiplier.shape:
        # TODO: check feature
        raise ValueError("Not suitable multiplier")
    else:
        return(data * multiplier)

def find_extreme_multiplier(dataset, how):
    # Retrieve data from dataset
    data = dataset.get_item('basic').obj
    multiplier = dataset.get_item('multiplier').obj

    if how not in ('max', 'min', 'mean', 'median'):
        raise ValueError("Not correct statistic")
    func = getattr(np, how)
    found_multiplier = func(multiplier, axis=0)
    return data * found_multiplier

methods = Method(unit_base='test', name='multiply')
methods.add_function_item(
    # name='multiply_one_by_one',  Default name is just function name.
    function=multiply_one_by_one,
    description='Multiply every element of the data and corresponding multiplier.'
)
methods.add_function_item(
    name='statistical_multiply',
    function=find_extreme_multiplier,
    description='Find a statistic of multiplier data and apply multiply.', 
    parameters={'how': 'Choose a statistical way to sort multiplier data.'}
)

methods.report();

+----------------------+--------------------------------+-------------------------+----------+
|         Name         |          Description           |           Func          |  Params  |
+----------------------+--------------------------------+-------------------------+----------+
| multiply_one_by_one  | Multiply every element of the  |   multiply_one_by_one   |          |
|                      |     data and corresponding     |                         |          |
|                      |          multiplier.           |                         |          |
| statistical_multiply | Find a statistic of multiplier | find_extreme_multiplier | (1) how. |
|                      |    data and apply multiply.    |                         |          |
+----------------------+--------------------------------+-------------------------+----------+


In [6]:
methods.statistical_multiply.parameters

{'how': 'Choose a statistical way to sort multiplier data.'}

In [7]:
analysis = Analyst(unit_base='test', name='multiply')
compare = Analyst(unit_base='test', name='compare_multiply')

analysis.add_analyst_item(
    name='simple_multiply',
    description='Simplest approach: multiply one by one.',
    data_item=test_data,  # Using Datasets object as input here.
    method_item=methods.multiply_one_by_one  # Using MethodItem object as input.
)

# TODO: now use iter to change parameters, expected to use function.
for how in ('max', 'min', 'mean', 'median'):
    method = methods.statistical_multiply
    analysis.add_analyst_item(
        name=f'{how}_multiply',
        description=f'Statistical multiply: {how}.',
        data_item=test_data,  # Using Datasets object as input here.
        method_item=methods.statistical_multiply, # Using MethodItem object as input.
        parameters={'how': how}
    )

analysis.dump_metadata()  # save metadata
analysis.report();

+-----------------+--------------------------------+------+----------------------+--------+
|       Name      |          Description           | Data |        Method        | Check? |
+-----------------+--------------------------------+------+----------------------+--------+
| simple_multiply |  Simplest approach: multiply   | test | multiply_one_by_one  |  None  |
|                 |          one by one.           |      |                      |        |
|   max_multiply  |   Statistical multiply: max.   | test | statistical_multiply |  None  |
|   min_multiply  |   Statistical multiply: min.   | test | statistical_multiply |  None  |
|  mean_multiply  |  Statistical multiply: mean.   | test | statistical_multiply |  None  |
| median_multiply | Statistical multiply: median.  | test | statistical_multiply |  None  |
+-----------------+--------------------------------+------+----------------------+--------+


In [8]:
analysis.do_all()

['simple_multiply',
 'max_multiply',
 'min_multiply',
 'mean_multiply',
 'median_multiply']

In [9]:
analysis.max_multiply.results

Unnamed: 0,A,B,C,D
0,1.242964,0.43494,2.333707,0.087114
1,1.383542,0.19771,0.778037,0.447366
2,2.827963,0.209974,1.492241,1.315618
3,0.053864,2.831585,2.163992,2.902084
4,1.040674,0.26163,0.474282,2.805521
5,1.201059,0.051748,1.252522,1.851545
6,2.91157,1.162462,0.312877,1.674097
7,2.26171,0.230609,2.089115,0.09014
8,1.445329,2.066235,1.237202,1.421712
