# Project creation notebook
This notebook creates a project and sets all the different functions that will be used. It then saves the project to create a project.yaml file that can be used later to load the project

In [1]:
import mlrun
import os
import pandas as pd
import numpy as np
import random

In [2]:
project_name = "stress-project"
project = mlrun.get_or_create_project(project_name, "./", user_project=True)

> 2024-05-27 14:00:33,790 [info] Created and saved project: {'name': 'stress-project-amite', 'from_template': None, 'overwrite': False, 'context': './', 'save': True}
> 2024-05-27 14:00:33,793 [info] Project created successfully: {'project_name': 'stress-project', 'stored_in_db': True}


# Setting the functions

## Jobs

In [3]:
normal_job_sec = project.set_function(func="jobs_func.py", name="normal-job-sec", handler='func_sec', image="mlrun/mlrun", kind="job")
normal_job_min = project.set_function(func="jobs_func.py", name="normal_job-min", handler='func_min', image="mlrun/mlrun", kind="job")
normal_job_hour = project.set_function(func="jobs_func.py", name="normal-job-hour", handler='func_hour', image="mlrun/mlrun", kind="job")
artifact_job = project.set_function(func="jobs_func.py", name="artifact-job", handler='func_artifact', image="mlrun/mlrun", kind="job")

# Gets 2 parameters that define the dataframe size - "num_rows" (int) and "num_columns" (int)
dataset_job = project.set_function(func="jobs_func.py", name="dataset-job", handler='func_dataset', image="mlrun/mlrun", kind="job")

Names with underscore '_' are about to be deprecated, use dashes '-' instead. Replacing 'normal_job-min' underscores with dashes.


## Spark

In [4]:
spark_func = project.set_function(func="spark_jobs_func.py", name="spark-job", handler="spark_handler", kind="spark", image="mlrun/mlrun").apply(mlrun.auto_mount())
spark_func.with_executor_requests(cpu="1",mem="1G")
spark_func.with_driver_requests(cpu="1",mem="1G")
spark_func.with_driver_limits(cpu="1")
spark_func.with_executor_limits(cpu="1")
spark_func.with_igz_spark()
spark_func.spec.image_pull_policy = "Always"
spark_func.spec.replicas = 2

## Dask

In [5]:
func = project.set_function(func='./dask_jobs_func.py',name='dask-func',image='mlrun/mlrun',kind='job',handler='train')
dask_cluster = mlrun.new_function("dask-cluster", kind='dask', image='mlrun/mlrun')
dask_cluster.apply(mlrun.auto_mount())        # add volume mounts
dask_cluster.spec.service_type = "NodePort"   # open interface to the dask UI dashboard
dask_cluster.spec.replicas = 1             # define one container
dask_cluster.set_env("MLRUN_DBPATH",os.environ["MLRUN_DBPATH"])
dask_cluster.set_env("MLRUN_DEFAULT_PROJECT",project.name)
uri = dask_cluster.save()
uri

'db://stress-project-amite/dask-cluster'

## Nuclio

In [6]:
nuclio_func = project.set_function("nuclio_func.py", name="nuclio-func", handler="handler", image="mlrun/mlrun", kind="nuclio")

## Log models

In [7]:
# Gets a parameter "number_of_models" (int) that define the number of models to create
log_models_func = project.set_function("log_models.py", name="log-models-job", handler="log_n_models", image="mlrun/mlrun", kind="job", requirements=["scikit-learn"])

## Serving

In [11]:
# Gets 2 parameters that define the number of servings and number of models in each serving - "number_of_servings" (int), "number_of_models" (int)
serving_creator_func = project.set_function("serving_creator_func.py", name="serving-creator-job", handler="deploy_serving_functions", image="mlrun/mlrun", kind="job")

## Workflow

In [9]:
project.set_workflow(name='workflow-func', workflow_path="workflow.py")

## Save project

In [12]:
project.save()

<mlrun.projects.project.MlrunProject at 0x7f8574931b80>