# Regression Bicycle

## Initialization

### Import

In [2]:
from datetime import datetime
from os.path import dirname
from pathlib import Path

import mltable
from azure.ai.ml import Input, Output, load_component, load_job
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.dsl import pipeline
from azure.ai.ml.entities import Environment
from azure.ai.ml.sweep import Choice
from script.ml_base import COMPONENT_PATH, ml_client


### Configuration

In [3]:
pwd = %pwd
PROJECT_DIR = Path(pwd)
SCRIPT_DIR = PROJECT_DIR / "script"
JOB_DIR = SCRIPT_DIR / "job"
COMPONENT_DIR = SCRIPT_DIR / "component"
ENVIRONMENT_DIR = SCRIPT_DIR / "environment"
DATA_DIR = PROJECT_DIR / "data"
OUTPUT_DIR = PROJECT_DIR / "output"
MODEL_DIR = PROJECT_DIR / "model"


## Environment

## Component

## Pipeline

### Load Component

#### Drop Column

In [7]:
drop_column = load_component(COMPONENT_DIR / "drop_column.yml")


#### Split Xy

In [8]:
split_x_y = load_component(COMPONENT_DIR / "split_x_y.yml")


#### Split Train Test

In [9]:
split_train_test = load_component(COMPONENT_DIR / "split_train_test.yml")


#### Linear Regression Model

In [10]:
linear_regression = load_component(COMPONENT_DIR / "linear_regression.yml")


### Bicycle Train Linear Regression

#### Build Pipeline

In [11]:
@pipeline(
    name="bicycle-train-linear-regression",
    display_name="Bicycle Train Linear Regression",
    description="Pipeline to train a linear regression model to predict bycicle rentals",
    experiment_name="regression-bicycle",
    tags={"project": "regression-bicycle"},
)
def bicycle_train_l_reg(
    mltable_dataset,
    drop_col: str = "dteday,yr,hum,windspeed",
    label: str = "rentals",
    test_size: float = 0.25,
    shuffle: bool = True,
    fit_intercept: bool = True,
):
    dropped_col_data = drop_column(
        input_data=mltable_dataset,
        drop_col=drop_col,
    )
    xy_data = split_x_y(
        input_data=dropped_col_data.outputs.dropped_col_data,
        label=label,
    )
    train_test_data = split_train_test(
        input_data=xy_data.outputs.x_y_data,
        test_size=test_size,
        shuffle=shuffle,
    )
    l_reg_model = linear_regression(
        input_data=train_test_data.outputs.train_test_data,
        fit_intercept=fit_intercept,
    )

    return {"l_reg_model": l_reg_model.outputs.l_reg_model}


#### Configure Pipeline

In [13]:
pipeline_job = bicycle_train_l_reg(
    mltable_dataset=Input(
        type=AssetTypes.MLTABLE,
        path="azureml:daily-bike-share:2",
    ),
    drop_col="dteday,yr,hum,windspeed",
    label="rentals",
    test_size=0.25,
    shuffle=True,
    fit_intercept=True,
)


In [14]:
pipeline_job.outputs.l_reg_model.mode = "upload"
pipeline_job.settings.default_compute = "mlw-basic-cluster-fef66d1bb2"
pipeline_job.settings.default_datastore = "workspaceblobstore"

print(pipeline_job)


Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


display_name: Bicycle Train Linear Regression
description: Pipeline to train a linear regression model to predict bycicle rentals
tags:
  project: regression-bicycle
type: pipeline
inputs:
  drop_col: dteday,yr,hum,windspeed
  label: rentals
  test_size: 0.25
  shuffle: 1
  fit_intercept: 1
  mltable_dataset:
    type: mltable
    path: azureml:daily-bike-share:2
outputs:
  l_reg_model:
    mode: upload
    type: mlflow_model
jobs:
  dropped_col_data:
    type: command
    inputs:
      input_data:
        path: ${{parent.inputs.mltable_dataset}}
      drop_col:
        path: ${{parent.inputs.drop_col}}
    component:
      $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
      name: drop_column
      version: '5'
      display_name: Drop Columns
      description: Drop specified columns from input dataset
      tags:
        project: regression-bicycle
      type: command
      inputs:
        input_data:
          type: mltable
          description: 

#### Submit Pipeline

In [18]:
pipeline_job = ml_client.jobs.create_or_update(pipeline_job)
pipeline_job


Experiment,Name,Type,Status,Details Page
regression-bicycle,loyal_feijoa_1rr8fp0j6j,pipeline,Preparing,Link to Azure Machine Learning studio


## Job

### Train Ridge Regression

In [20]:
ridge_reg_job = load_job(source=JOB_DIR / "ridge_regression.yml")


#### Submit Job

In [21]:
ridge_reg_job.name = (
    ridge_reg_job.name + f"-{datetime.now().strftime('%Y%m%d%H%M%S')}"
)
ml_client.jobs.create_or_update(ridge_reg_job)


Experiment,Name,Type,Status,Details Page
ridge-regression,ridge-regression-20231118201023,command,Starting,Link to Azure Machine Learning studio


## Sweep Job

### Ridge Regression Sweep

In [22]:
ridge_reg_job_for_sweep = ridge_reg_job(
    test_size=Choice(values=[0.20, 0.25]),
    alpha=Choice(values=[1.0, 5.0, 10.0]),
)


In [24]:
ridge_reg__sweep = ridge_reg_job_for_sweep.sweep(
    sampling_algorithm="grid",
    primary_metric="Ridge_score_X_test",
    goal="Maximize",
)
ridge_reg__sweep.set_limits(max_concurrent_trials=2)


### Submit Sweep Job

In [25]:
ml_client.create_or_update(ridge_reg__sweep)


Experiment,Name,Type,Status,Details Page
regression-bicycle,silly_energy_cwlbpk91sv,sweep,Running,Link to Azure Machine Learning studio
