In [45]:
from pprint import pprint

import numpy as np
import pandas as pd
from pyjackson import serialize
from sklearn.metrics import roc_auc_score

import algolink


def get_data():
    data = pd.DataFrame([[1, 0], [0, 1]], columns=['a', 'b'])
    target = np.array([1, 0])
    return data, target


def constant(data):
    return np.array([0 for _ in range(len(data))])


def truth(data: pd.DataFrame):
    return np.array([r[0] for _, r in data.iterrows()])


def my_custom_metric(y_true, y_score):
    return y_score.sum() / y_true.sum() * 100.


def main():
    ebnt = algolink.AlgoLink.local(clear=True)

    data, target = get_data()
    # we want easy way to transform anything to datasets, so its either this or ebonite.create_dataset (same for metrics)
    # for now there is no difference, however if we want manage datasets with meta and art repos, we use client
    # or create with ebonite.create_... and then push with ebnt.push_... like for models
    # dataset = ebnt.create_dataset(data, target)

    # here we postpone setting task input and output types for easy task creation
    task = ebnt.get_or_create_task('my_project', 'regression_is_my_profession')
    print(roc_auc_score,"roc_auc_score")
    task.add_metric('auc', roc_auc_score)
    task.add_metric('custom', my_custom_metric)
    task.add_evaluation('train', data, target, ['auc', 'custom'])

    pprint(task.evaluation_sets)
    pprint(task.datasets)
    pprint(task.metrics)

    # omit providing dataset as we already have it in task
    mc = task.create_and_push_model(constant, data, model_name='constant')
    mt = task.create_and_push_model(truth, data, model_name='truth')

    pprint(mc.wrapper.methods)
    pprint(mt.wrapper.methods)

    # maybe save result to models? also need different ways to evaluate "not all"
    result = task.evaluate_all()

    print(result)
    ebnt._bind(task)
    task.save()
    pprint(serialize(task))


if __name__ == '__main__':
    main()

<function roc_auc_score at 0x7fd493019320> roc_auc_score
{'train': <algolink.core.objects.core.EvaluationSet object at 0x7fd4959e3dd0>}
{'train_input': <algolink.core.objects.dataset_source.InMemoryDatasetSource object at 0x7fd495a199d0>,
 'train_output': <algolink.core.objects.dataset_source.InMemoryDatasetSource object at 0x7fd4959e3a90>}
{'auc': <algolink.core.objects.metric.LibFunctionMetric object at 0x7fd4957cd9d0>,
 'custom': <algolink.core.objects.metric.CallableMetric object at 0x7fd49599ec50>}
{'predict': ('__call__',
             <class 'pyjackson.generics.DataFrameType[columns=['a', 'b'],dtypes=['int64', 'int64'],index_cols=[]]'>,
             <class 'pyjackson.generics.NumpyNdarrayDatasetType[shape=(2,),dtype=int64]'>)}
{'predict': ('__call__',
             <class 'pyjackson.generics.DataFrameType[columns=['a', 'b'],dtypes=['int64', 'int64'],index_cols=[]]'>,
             <class 'pyjackson.generics.NumpyNdarrayDatasetType[shape=(2,),dtype=int64]'>)}
{'constant': Evaluation

In [12]:
import os
from pprint import pprint

import numpy as np
import pandas as pd
from pyjackson import serialize
from sklearn.metrics import roc_auc_score

import algolink
from algolink import AlgoLink

def get_data():
    data = pd.DataFrame([[1, 0], [0, 1]], columns=['a', 'b'])
    target = np.array([1, 0])
    return data, target


def constant(data):
    return np.array([0 for _ in range(len(data))])


def truth(data: pd.DataFrame):
    return np.array([r[0] for _, r in data.iterrows()])


def my_custom_metric(y_true, y_score):
    return y_score.sum() / y_true.sum() * 100.




def add_one(data):
    return data + 1

from algolink.core.objects.dataset_source import Dataset
from algolink.repository import DatasetRepository


def test_save(dataset_repo: DatasetRepository, data: Dataset):
    source = dataset_repo.save('a', data)
    data2 = source.read()

ebnt = AlgoLink.local("./dataset")
data, target = get_data()
task3 = ebnt.get_or_create_task('my_project', 'my_task2')
dataset = ebnt.create_dataset(data, target)
task3.save()
#task3.add_metric('auc', roc_auc_score)
    
#task3.add_metric('custom', my_custom_metric)
#task3.add_evaluation('train', data, target, ['auc', 'custom'])
#task3.save()
#ebnt.create_metric(1)
def main():
    #  create remote ebonite client. This client stores metadata in postgres/sqlite and artifacts in s3
    alink = AlgoLink.custom_client('sqlalchemy', 'local',
                                 meta_kwargs={'db_uri': 'sqlite:///sql.db'},
                                 artifact_kwargs={'path': './'})
    # save client configuration for later use
    alink.save_client_config('client_config.json')
    
    #  obtain Task
    task = alink.get_or_create_task('my_project', 'my_task_churn_model')
    print(task.name)
    #  remove model if it exists (for demo purposes)
    if task.models.contains('add_one_model'):
        model = task.models('add_one_model')
        task.delete_model(model)

    task.add_metric('auc', roc_auc_score)
    
    task.add_metric('custom', my_custom_metric)
    task.add_evaluation('train', data, target, ['auc', 'custom'])
    pprint(task.evaluation_sets)
    #task.evaluate_all()
    #  create model from function add_one and numpy array as data sample
    model = algolink.create_model(add_one, np.array([0]), 'churn_model')
    #model.evaluate_set('test_bool', method_name='predict1', raise_on_error=True)
    #  persist model
    task.push_model(model)
    task.push_datasets()
    model.artifact_any
    model.artifact_req_persisted
    #model.evaluate_set(['d'],"sd","roc")
    alink._bind(task)
    task.save()

if __name__ == '__main__':
    main()

my_task_churn_model
{'train': <algolink.core.objects.core.EvaluationSet object at 0x7ff5dfd544d0>}


In [65]:
!ls -a

[34m.[m[m                    algolink重构.ipynb   model_tracking.py
[34m..[m[m                   client_config.json   sql.db
[34m.algolink[m[m            [34mdatasets[m[m             test.py
[34m.ipynb_checkpoints[m[m   [34mmodel[m[m                tests.ipynb
[34m__pycache__[m[m          model-track.ipynb


In [2]:
!pwd

/Users/leepand/Downloads/MLOps/AlgoLink/tests


In [2]:
!ls

algolink重构.ipynb   [34mmodel[m[m                test.py
client_config.json   sql.db               tests.ipynb


In [33]:
from ebonite.repository import DatasetRepository
DatasetRepository().save('a', "data")

In [43]:
!ls -a .algolink/artifacts/datasets/0

[34m.[m[m            [34m..[m[m           [34mtrain_input[m[m  [34mtrain_output[m[m


In [40]:
!rm -rf .algolink

In [79]:
task3.get()

Task(id=0,name=my_task2)

In [8]:
import os
from pprint import pprint

import numpy as np
import pandas as pd
from pyjackson import serialize
from sklearn.metrics import roc_auc_score

import algolink
from algolink import AlgoLink

def get_data():
    data = pd.DataFrame([[1, 0], [0, 1]], columns=['a', 'b'])
    target = np.array([1, 0])
    return data, target


def constant(data):
    return np.array([0 for _ in range(len(data))])


def truth(data: pd.DataFrame):
    return np.array([r[0] for _, r in data.iterrows()])


def my_custom_metric(y_true, y_score):
    return y_score.sum() / y_true.sum() * 100.




def add_one(data):
    return data + 1

from algolink.core.objects.dataset_source import Dataset
from algolink.repository import DatasetRepository


def test_save(dataset_repo: DatasetRepository, data: Dataset):
    source = dataset_repo.save('a', data)
    data2 = source.read()

#ebnt = AlgoLink.local("./dataset")
#data, target = get_data()
#task3 = ebnt.get_or_create_task('my_project', 'my_task2')
#dataset = ebnt.create_dataset(data, target)
#task3.save()
#task3.add_metric('auc', roc_auc_score)
    
#task3.add_metric('custom', my_custom_metric)
#task3.add_evaluation('train', data, target, ['auc', 'custom'])
#task3.save()
#ebnt.create_metric(1)
def main():
    #  create remote ebonite client. This client stores metadata in postgres/sqlite and artifacts in s3
    alink = AlgoLink.custom_client('sqlalchemy', 'local',
                                 meta_kwargs={'db_uri': 'sqlite:///sql.db'},
                                 artifact_kwargs={'path': './'})
    # save client configuration for later use
    alink.save_client_config('client_config.json')
    
    #  obtain Task
    task = alink.get_or_create_task('mlops_19', 'lpo_50')
    print(task.name)
    #  remove model if it exists (for demo purposes)
    if task.models.contains('churn_model8'):
        model = task.models('churn_model8')
        task.delete_model(model)

    task.add_metric('auc', roc_auc_score)
    
    task.add_metric('custom', my_custom_metric)
    task.add_evaluation('train', data, target, ['auc', 'custom'])
    pprint(task.evaluation_sets)
    #task.evaluate_all()
    #  create model from function add_one and numpy array as data sample
    model = algolink.create_model(add_one, np.array([0]), 'churn_model8')
    #model.evaluate_set('test_bool', method_name='predict1', raise_on_error=True)
    #  persist model
    task.push_model(model)
    #task.push_datasets()
    #model.artifact_any
    #model.artifact_req_persisted
    #model.evaluate_set(['d'],"sd","roc")
    #alink._bind(task)
    #task.save()

if __name__ == '__main__':
    main()

lpo_50
{'train': <algolink.core.objects.core.EvaluationSet object at 0x7fe12bc63fd0>}


In [None]:
[distutils]
index-servers=pypi

[pypi]
repository = https://upload.pypi.org/legacy/
username = <pandasasa>
password = <lipd@123>