## Build base Docker image (if not already built)

In [66]:
%%bash
docker build -t mlflow-docker-example -f bike-rental-regression/Dockerfile . 

#1 [internal] load build definition from Dockerfile
#1 sha256:e4ac100f3282b0a3d2f8ff147cfc9f0edd9efeea61186835211ff96f5e556503
#1 transferring dockerfile: 121B done
#1 DONE 0.0s

#2 [internal] load .dockerignore
#2 sha256:c6fb8584dff80808aaa78338dd530c344f07426c40f4d09e7ecb0e59b6ebb8a5
#2 transferring context: 2B done
#2 DONE 0.0s

#3 [internal] load metadata for docker.io/continuumio/miniconda3:4.10.3
#3 sha256:f692b03fa4ad9171171f333a701a320801f86a5a153c52ecb1003a5c4c2ae3ce
#3 DONE 0.0s

#4 [1/2] FROM docker.io/continuumio/miniconda3:4.10.3
#4 sha256:8d3ee3475e2d79064b8667ca44bad43683fdca09d0f59892ce0b7adcdd5a6eb5
#4 DONE 0.0s

#5 [2/2] RUN pip install mlflow pandas scikit-learn
#5 sha256:df114ef061e08b1a15f4b255a30ebcd6bce72a29a45f77217f96a321d1f1777f
#5 CACHED

#6 exporting to image
#6 sha256:e8c613e07b0b7ff33893b694f7759a10d42e180f2b4dc349fb57dc6b71dcab00
#6 exporting layers done
#6 writing image sha256:86f7ed0ce8d8684e21973191ee645f8bc61b235269de63dd745f3accb3a553b8 done
#6 namin

## Train a model

In [67]:
%%bash
mlflow run bike-rental-regression -P train_year=2011 -P train_month=1 -P test_year=2011 -P test_month=3 -P dataset_path=/data/train_bike.csv

Process is interrupted.


## Package the model in a Docker image and register it in the local registry

In [None]:
%%bash
mlflow models build-docker --model-uri runs:/c258b637e0a64ffe979248f475d4c94b/model --name "my-image-name"

## Tag the Docker image in order to push it to GCP Artifact Registry

In [None]:
%%bash
docker tag my-image-name:latest europe-north1-docker.pkg.dev/aalto-atss/cyclone-docker-repo/my-image-name:latest

## Push the image

In [None]:
%%bash
docker push europe-north1-docker.pkg.dev/aalto-atss/cyclone-docker-repo/my-image-name:latest

The push refers to repository [europe-north1-docker.pkg.dev/aalto-atss/cyclone-docker-repo/my-image-name]
ba3832347bc7: Preparing
1e5fb491959e: Preparing
439d36d2da31: Preparing
d0cdc2725501: Preparing
9a421c24b37a: Preparing
6719756ec0b7: Preparing
931f8fb95381: Preparing
22843a8b08ac: Preparing
4554b763a583: Preparing
c38454c68f66: Preparing
2fb7b764853b: Preparing
824bf068fd3d: Preparing
6719756ec0b7: Waiting
931f8fb95381: Waiting
22843a8b08ac: Waiting
4554b763a583: Waiting
c38454c68f66: Waiting
824bf068fd3d: Waiting
2fb7b764853b: Waiting
d0cdc2725501: Layer already exists
1e5fb491959e: Layer already exists
ba3832347bc7: Layer already exists
9a421c24b37a: Layer already exists
439d36d2da31: Layer already exists
6719756ec0b7: Layer already exists
931f8fb95381: Layer already exists
22843a8b08ac: Layer already exists
4554b763a583: Layer already exists
c38454c68f66: Layer already exists
2fb7b764853b: Layer already exists
824bf068fd3d: Layer already exists
latest: digest: sha256:6e2557773

## Deploy on GCR

Create a new service on GCR, setting the port to 8000, setting `DISABLE_NGINX` to `true`, and `GUNICORN_CMD_ARGS` to `--bind=0.0.0.0`.
Essentially we want to replicate on GCR this local command:
```
docker run -p 5001:8000 -e DISABLE_NGINX=true -e GUNICORN_CMD_ARGS="--bind=0.0.0.0" my-image-name:latest
```

## Test that the model deployed on GCR has the expected performance

We get the service's `URL` from GCR's console.

In [96]:
import requests
import json
import pandas as pd
import numpy as np

URL = 'https://my-serving-image-ae5g3kdcea-lz.a.run.app/invocations'
# URL = 'http://localhost:5000/invocations'
# URL = 'http://34.121.124.220:8000/invocations'

dataset = pd.read_csv('data/train_bike.csv', header=0,  sep=',',
                      parse_dates=['datetime'], index_col='datetime').drop(
    columns=['casual', 'registered']
)
test_year = 2011
test_month = 3
test: pd.DataFrame = dataset.loc[
    lambda df: (df.index.year == test_year) & (df.index.month == test_month)
]
def time_features(input_df):
    return input_df.assign(
        month = lambda df: df.index.month,
        day = lambda df: df.index.day,
        hour = lambda df: df.index.hour,
    )
test.pipe(time_features).drop(columns='count').head().to_json(orient='split', index=False)

response = requests.post(
    URL,
    json=json.loads(
        test.pipe(time_features)
        .drop(columns='count')
        .to_json(orient='split', index=False)
    )
)

predictions = response.json()



from sklearn.metrics import r2_score

score = r2_score(test.pipe(time_features)['count'].values, predictions) 
EXPECTED_SCORE = 0.603

assert test_month == 3
assert np.abs(score - EXPECTED_SCORE) < 1e-2
print(f"R2_test: {score:.3f}; year: {test_year}, month: {test_month}")

R2_test: 0.603; year: 2011, month: 3
