In [1]:
import pandas as pd
import repo.repo as repo #import repo
import repo.memory_handler as memory_handler
from repo.repo_objects import RepoInfoKey, MeasureConfiguration
from job_runner.job_runner import SimpleJobRunner, JobState
import logging as logging

logging.basicConfig(level=logging.DEBUG)

# Read housing data

In [2]:
data = pd.read_csv('housing.csv')

# Create a new repository

In [3]:
# setting up the repository
handler = memory_handler.RepoObjectMemoryStorage()
numpy_handler = memory_handler.NumpyMemoryStorage()
ml_repo = repo.MLRepo('test_user', handler, numpy_handler, handler, None)
job_runner = SimpleJobRunner(ml_repo)
ml_repo._job_runner = job_runner

## Adding data

In [4]:
ml_repo.add_data('boston_housing', data, input_variables=['RM', 'LSTAT', 'PTRATIO'], target_variables = ['MEDV'])

In [5]:
training_data = repo.DataSet('boston_housing', 0, 300, 
                            repo_info = {RepoInfoKey.NAME.value: 'training_data', RepoInfoKey.CATEGORY.value: repo.MLObjectType.TRAINING_DATA})
test_data = repo.DataSet('boston_housing', 301, -1, 
                            repo_info = {RepoInfoKey.NAME.value: 'test_data',  RepoInfoKey.CATEGORY.value: repo.MLObjectType.TEST_DATA})
version_info = ml_repo.add([training_data, test_data], 'add training and test data')

## Adding sklearn model

In [6]:
import externals.sklearn_interface as sklearn_interface
from sklearn.tree import DecisionTreeRegressor

In [7]:
sklearn_interface.add_model(ml_repo, DecisionTreeRegressor(), model_param={'max_depth': 5})

In [8]:
job_id =[ml_repo.run_training()]
#job_id = ml_repo.run_evaluation()
#len(job_id)

INFO:root:Training job added to jobrunner, job_id: 46fbedde-bc24-11e8-ba09-fc084a6691eb


In [9]:
print(job_id)
ml_repo.get_names(repo.MLObjectType.CALIBRATED_MODEL.value)
ml_repo.get_names(repo.MLObjectType.MODEL.value)

[UUID('46fbedde-bc24-11e8-ba09-fc084a6691eb')]


['DecisionTreeRegressor']

In [10]:
job_status = job_runner.get_status(job_id[-1])
if job_status == JobState.FAILED:
    print(job_runner.get_trace_back(job_id[-1]))
    job_runner.get_error_message(job_id[-1])

In [11]:
mm = ml_repo._get('DecisionTreeRegressor/model')
mm.repo_info[RepoInfoKey.MODIFICATION_INFO.value]['DecisionTreeRegressor']
mm.repo_info[RepoInfoKey.VERSION]

0

In [12]:
for k in ml_repo.get_commits():
    print(str(k))

time: 2018-09-19 17:54:20.966279, author: test_user, message: data boston_housing added to repository, objects: {'boston_housing': 0, 'repo_mapping': 0}
time: 2018-09-19 17:54:21.030747, author: test_user, message: add training and test data, objects: {'training_data': 0, 'test_data': 0, 'repo_mapping': 1}
time: 2018-09-19 17:54:23.001679, author: test_user, message: add model evaluation function eval_sklearn, objects: {'eval_sklearn': 0, 'repo_mapping': 2}
time: 2018-09-19 17:54:23.001679, author: test_user, message: add model training function train_sklearn, objects: {'train_sklearn': 0, 'repo_mapping': 3}
time: 2018-09-19 17:54:23.007617, author: test_user, message: adding model and training parameter, objects: {'DecisionTreeRegressor/model_param': 0, 'repo_mapping': 4}
time: 2018-09-19 17:54:23.007617, author: test_user, message: add model DecisionTreeRegressor, objects: {'DecisionTreeRegressor': 0, 'repo_mapping': 5}
time: 2018-09-19 17:54:23.056022, author: test_user, message: tr

In [26]:
for k in repo.MLObjectType:
    names = ml_repo.get_names(k.value)
    for n in names: 
        print(n + '  ' + k.value) 

DecisionTreeRegressor/eval/test_data  eval_data
DecisionTreeRegressor/eval/training_data  eval_data
boston_housing  raw_data
training_data  training_data
test_data  test_data
DecisionTreeRegressor/model_param  model_param
train_sklearn  training_function
eval_sklearn  model_eval_function
DecisionTreeRegressor  model
DecisionTreeRegressor/model  calib_model
CommitInfo  commit_info
repo_mapping  mapping
DecisionTreeRegressor/test_data/max  measure
DecisionTreeRegressor/training_data/max  measure
measure_config  measure_config


In [14]:
param = ml_repo._get('DecisionTreeRegressor/model_param')

In [15]:
str(param.sklearn_params)

"{'criterion': 'mse', 'max_depth': 5, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_split': 1e-07, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': False, 'random_state': None, 'splitter': 'best'}"

In [16]:
param.sklearn_params['max_depth'] = 3

In [17]:
ml_repo.add(param, message='change max_depth to 3')

1

In [18]:
d = ml_repo.run_training()

INFO:root:Training job added to jobrunner, job_id: 474e847e-bc24-11e8-8fb1-fc084a6691eb


In [19]:
job_runner.get_status(d)

<JobState.SUCCESSFULLY_FINISHED: 'successfully_finished'>

In [20]:
ml_repo.run_evaluation()

INFO:root:Eval job added to jobrunner, job_id: 475ef94c-bc24-11e8-8e56-fc084a6691eb
INFO:root:Eval job added to jobrunner, job_id: 475f955c-bc24-11e8-a326-fc084a6691eb


[UUID('475ef94c-bc24-11e8-8e56-fc084a6691eb'),
 UUID('475f955c-bc24-11e8-a326-fc084a6691eb')]

In [21]:
job_runner._job_status

{UUID('46fbedde-bc24-11e8-ba09-fc084a6691eb'): <JobState.SUCCESSFULLY_FINISHED: 'successfully_finished'>,
 UUID('474e847e-bc24-11e8-8fb1-fc084a6691eb'): <JobState.SUCCESSFULLY_FINISHED: 'successfully_finished'>,
 UUID('475ef94c-bc24-11e8-8e56-fc084a6691eb'): <JobState.SUCCESSFULLY_FINISHED: 'successfully_finished'>,
 UUID('475f955c-bc24-11e8-a326-fc084a6691eb'): <JobState.SUCCESSFULLY_FINISHED: 'successfully_finished'>}

In [22]:
ml_repo.add_measure(MeasureConfiguration.MAX)

In [23]:
depp = ml_repo.run_measures()
#job_runner.get_error_message(depp[0])

INFO:root:Measure job max added to jobrunner, job_id: 477a909c-bc24-11e8-9708-fc084a6691eb
INFO:root:Measure job max added to jobrunner, job_id: 477a909d-bc24-11e8-9c83-fc084a6691eb


running
running
add measure DecisionTreeRegressor/test_data/max
running
running
add measure DecisionTreeRegressor/training_data/max


In [24]:
ml_repo.get_names(repo.MLObjectType.MEASURE_CONFIGURATION)

['measure_config']

In [25]:
job_runner.get_error_message(depp[0])
job_runner.get_trace_back(depp[0])

KeyError: UUID('477a909c-bc24-11e8-9708-fc084a6691eb')