In [None]:
import os
import argparse
import kubeflow.katib as kc
from datetime import datetime
from kubernetes.client import V1ObjectMeta

class KatibJob(object):
    def get_experiment_name(self, experiment_name):
        now = datetime.now()
        return "-" + experiment_name + now.strftime("%Y%m%d%S")
    
    def train(self):
        parser= argparse.ArgumentParser()
        parser.add_argument("--dataset_filename", required=False, default="Sloshing_200702.csv")
        parser.add_argument("--loss", required=False, default=1)
        parser.add_argument("--image_name", required=False, default='kubeflow-registry.default.svc.cluster.local:30000/sloshing-train-job:236C3225')
        args = parser.parse_args()
 
        #katibe description
        metadata = V1ObjectMeta(
          name = self.get_experiment_name("sloshing"),
          namespace = "shi"  
        )
        
        algorithm_spec = kc.V1alph3AlgorithmSpec(
          algorithm_name = "random"
        )
        
        objective_spec = kc.V1alpha3ObjectiveSpec(
          type = "minimize",
          goal = args.loss,
          objective_mertic_name = "loss"
        )
        
        parameters = [
            kc.V1alpha3ParameterSpec(
              name = "epoch",
              parameter_type = "int",
              feasible_space = V1alpha3FeasibleSpace(
                 min = "10",
                 max = "30" 
              )  
            )
        ]
        
        trial_spec = {
          "apiVersion": "batch/v1",
          "kind": "Job",
          "spec": {
              "template": {
                  "spec": {
                    "containers": [
                        {
                            "name": "training-container",
                            "image": args.image_name,
                            "command": [
                                "python",
                                "/app/sloshing_for_train.py",
                                "--epoch=${trialParameters.epoch}",
                                "--filename="+args.dataset_filename
                            ]
                        }
                    ],
                    "restartPolicy": "Never"  
                  }
              }
          }  
        }
        
        trial_template = kc.V1alpha3TrialTemplate(
          trial_parameters = [
              kc.V1alpha3TrialParameterSpec(
                name = "epoch",
                description = "Epoch of training model",
                reference = "epoch"  
              )
          ],
          trial_spec=trial_spec
        )
        
        experiment = kc.V1alpha3Experiment(
          api_version = "kubeflow.org/v1alpha3",
          kind = "Experiment",
          metadata = metadata,
          spec = kc.V1alpha3ExperimentSpec(
            max_trial_count = 12,
            parallel_trail_count = 3,
            max_failed_trial_count = 3,
            algorithm = algorithm_spec,
            objective = objective_spec,
            parameters = parameters,  
            trial_template = trial_template
          )  
        )
        
        katibClient = kc.KatibCleint()
        katibClient.create_experiment(experiment)
        
        
if __name__=="__main__":
    if os.getenv('FAIRING_RUNTIME', None) is None:
        os.environ['no_proxy'] = "kubeflow-registry.default.svc.cluster.local,0.0.0.0,minio-service.kubeflow.svc.cluster.local"
        from kubeflow.fairing.builders.append.append import AppendBuilder
        from kubeflow.fairing.preprocessors.converted_notebook import ConvertNotebookPreprocessor
        
        REGISTRY = 'kubeflow-registry.default.svc.cluster.local:30000'
        base_image = 'kubeflow-registry.default.svc.cluster.local:30000/sloshing:1.0.4'
        image_name= 'sloshing-for-katib'
        
        builder = AppendBuilder(
          registry=REGISTRY,
          image_name = image_name,
          base_image = base_image,
          push = True,
          preprocessor = ConvertNotebookPreprocessor(
            notebook_file = "sloshing_for_katib.ipynb"
          )  
        )
        builder.build()
    else:
        sloshing = KatibJob()
        sloshing.train()
    

In [None]:
!pip freeze

In [1]:
!git clone https://github.com/mojokb/kubexxx-off

Cloning into 'kubexxx-off'...
fatal: unable to access 'https://github.com/mojokb/kubexxx-off/': Recv failure: Connection reset by peer
