In [1]:
from accutuning_client.client import Client

In [2]:
client = Client('localhost', 8000)

In [3]:
client.login('autoinsight', 'autoinsight')

True

In [4]:
# Experiment의 List를 불러온다.
experiments = client.experiments()
print(f'현재 Experiments는 총 {len(experiments)}개 있습니다.')
experiments

현재 Experiments는 총 3개 있습니다.


3 Experiments
Experiment(id=10, name=Experiment-10, dataset.name=iris1_jtzdihs, dataset.colCount=5, status=finished, estimatorType=CLASSIFIER, metric=ACCURACY, modelsCnt=28, deploymentsCnt=1)
Experiment(id=2, name=Experiment-2, dataset.name=iris1, dataset.colCount=5, status=finished, estimatorType=CLASSIFIER, metric=ACCURACY, modelsCnt=28)
Experiment(id=1, name=Experiment-1, dataset.name=diabetes2, dataset.colCount=11, status=finished, estimatorType=REGRESSOR, metric=NEG_MEAN_SQUARED_ERROR, modelsCnt=52, deploymentsCnt=2)

In [5]:
# Local File에서 Experiment를 생성한다. 
experiment_new = client.create_experiment_from_file('/Users/ahaljh/Downloads/iris1.csv')
experiment_new

Experiment(id=11, name=Experiment-11, status=creating, estimatorType=classifier, metric=accuracy)

In [8]:
experiment_new

Experiment(id=11, name=Experiment-11, dataset.name=iris1_2mKwlW6, dataset.colCount=5, status=ready, estimatorType=CLASSIFIER, metric=ACCURACY)

In [9]:
# 전처리를 추천받아 preprocessor config를 변경 
experiment_new.preprocessor_config_recommend()

In [10]:
# 지정한 config 설정대로 전처리를 실시
experiment_new.preprocess()

In [12]:
# Run AutoML
experiment_new.run()

True

In [13]:
# Experiment의 List를 불러온다.
experiments = client.experiments()
print(f'현재 Experiments는 총 {len(experiments)}개 있습니다.')
experiments

현재 Experiments는 총 4개 있습니다.


4 Experiments
Experiment(id=11, name=Experiment-11, dataset.name=iris1_2mKwlW6, dataset.colCount=5, status=learning, estimatorType=CLASSIFIER, metric=ACCURACY, modelsCnt=3)
Experiment(id=10, name=Experiment-10, dataset.name=iris1_jtzdihs, dataset.colCount=5, status=finished, estimatorType=CLASSIFIER, metric=ACCURACY, modelsCnt=28, deploymentsCnt=1)
Experiment(id=2, name=Experiment-2, dataset.name=iris1, dataset.colCount=5, status=finished, estimatorType=CLASSIFIER, metric=ACCURACY, modelsCnt=28)
Experiment(id=1, name=Experiment-1, dataset.name=diabetes2, dataset.colCount=11, status=finished, estimatorType=REGRESSOR, metric=NEG_MEAN_SQUARED_ERROR, modelsCnt=52, deploymentsCnt=2)

In [14]:
# 시간이 오래 걸리니까 완료된 experiment를 다시 선택
experiment = experiments.get(id=10)
# experiment = experiments[1]  # 이 형태도 가능함
experiment

Experiment(id=10, name=Experiment-10, dataset.name=iris1_jtzdihs, dataset.colCount=5, status=finished, estimatorType=CLASSIFIER, metric=ACCURACY, modelsCnt=28, deploymentsCnt=1)

In [15]:
# Leaderboard 정보 구해오기 
leaderboard = experiment.leaderboard()
print(f'leaderboad의 model 갯수는 {len(leaderboard)}')
leaderboard

leaderboad의 model 갯수는 28


[{'id': '197',
  'score': 1.0,
  'trainScore': 1.0,
  'validScore': 1.0,
  'testScore': 0.0,
  'estimatorName': 'lightgbm',
  'generator': 'optuna-pycaret',
  'file': {'size': '784667', 'sizeHumanized': '784.7 kB'},
  'deployedStatus': 'DONE'},
 {'id': '199',
  'score': 1.0,
  'trainScore': 0.9523809523809523,
  'validScore': 1.0,
  'testScore': 0.0,
  'estimatorName': 'random_forest',
  'generator': 'optuna-pycaret',
  'file': {'size': '129325', 'sizeHumanized': '129.3 kB'},
  'deployedStatus': None},
 {'id': '200',
  'score': 1.0,
  'trainScore': 0.9238095238095239,
  'validScore': 1.0,
  'testScore': 0.0,
  'estimatorName': 'xgboost',
  'generator': 'optuna-pycaret',
  'file': {'size': '139255', 'sizeHumanized': '139.3 kB'},
  'deployedStatus': None},
 {'id': '202',
  'score': 1.0,
  'trainScore': 0.9904761904761905,
  'validScore': 1.0,
  'testScore': 0.0,
  'estimatorName': 'gradient_boosting',
  'generator': 'optuna-pycaret',
  'file': {'size': '408021', 'sizeHumanized': '408.0 k

In [16]:
# 가장 순위 높은 모델 선택
model = leaderboard.best_model()
# model = leaderboard[0]  # 이 형태도 가능함
model

{'id': '197',
 'score': 1.0,
 'trainScore': 1.0,
 'validScore': 1.0,
 'testScore': 0.0,
 'estimatorName': 'lightgbm',
 'generator': 'optuna-pycaret',
 'file': {'size': '784667', 'sizeHumanized': '784.7 kB'},
 'deployedStatus': 'DONE'}

In [17]:
# 두 번째 순위 높은 모델 선택 (Best Model은 Run후 자동배포되므로..)
model = leaderboard[1]
model

{'id': '199',
 'score': 1.0,
 'trainScore': 0.9523809523809523,
 'validScore': 1.0,
 'testScore': 0.0,
 'estimatorName': 'random_forest',
 'generator': 'optuna-pycaret',
 'file': {'size': '129325', 'sizeHumanized': '129.3 kB'},
 'deployedStatus': None}

In [18]:
# 모델 배포
model.deploy()

{'deployModel': {'deployment': {'id': '9', 'model': {'id': '199', '__typename': 'MlModelBaseType', 'deployedStatus': 'REQUEST'}}}}


In [19]:
# Deployment 정보 구해오기 
deployments = experiment.deployments()
print(f'배포된 모델은 {len(deployments)}개 입니다.')
deployments

배포된 모델은 2개 입니다.


[{'id': '9',
  'name': 'random_forest',
  'description': None,
  'status': 'DEPLOYING',
  'modelType': 'model',
  'modelPk': 199,
  'allMetricsJson': None,
  'createdAt': '2021-02-15T17:20:03.249821',
  'testScore': None,
  'model': {'id': '199', 'trainScore': 0.9523809523809523, 'validScore': 1.0},
  'file': {'url': None, 'size': None, 'sizeHumanized': None, 'name': ''}},
 {'id': '8',
  'name': 'lightgbm',
  'description': None,
  'status': 'DONE',
  'modelType': 'model',
  'modelPk': 197,
  'allMetricsJson': None,
  'createdAt': '2021-02-15T17:18:05.064582',
  'testScore': None,
  'model': {'id': '197', 'trainScore': 1.0, 'validScore': 1.0},
  'file': {'url': 'http://localhost:8000/media/runtime_0010/runtimeprocess_0045/output/pipeline.pkl',
   'size': '784977',
   'sizeHumanized': '785.0 kB',
   'name': 'runtime_0010/runtimeprocess_0045/output/pipeline.pkl'}}]

In [20]:
deployed_model = deployments[0]
deployed_model

{'id': '9',
 'name': 'random_forest',
 'description': None,
 'status': 'DEPLOYING',
 'modelType': 'model',
 'modelPk': 199,
 'allMetricsJson': None,
 'createdAt': '2021-02-15T17:20:03.249821',
 'testScore': None,
 'model': {'id': '199', 'trainScore': 0.9523809523809523, 'validScore': 1.0},
 'file': {'url': None, 'size': None, 'sizeHumanized': None, 'name': ''}}

In [21]:
# 모델 예측을 위해 Default값인 최빈값을 구함
columns = experiment.column_info()
most_frequent = columns.most_frequent_values()
most_frequent

{'sepal length (cm)': '5.0',
 'sepal width (cm)': '3.0',
 'petal length (cm)': '1.5',
 'petal width (cm)': '0.2'}

In [22]:
# 예측을 위한 input 값 생성 (그냥 최빈값 사용)
input_val = most_frequent
input_val 

{'sepal length (cm)': '5.0',
 'sepal width (cm)': '3.0',
 'petal length (cm)': '1.5',
 'petal width (cm)': '0.2'}

In [23]:
# 예측 
predict_val = deployed_model.predict(input_val)
predict_val

'0.0'