In [1]:
import pandas as pd
import requests
import json
import pprint
from sklearn.metrics import r2_score
from copy import deepcopy
URL = 'https://my-serving-image-ae5g3kdcea-lz.a.run.app/invocations'
# URL = 'http://localhost:5000/invocations'


def time_features(input_df):
    return input_df.assign(
        month=lambda df: df.index.month,
        day=lambda df: df.index.day,
        hour=lambda df: df.index.hour,
    )


def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]


def make_request(url, test_year, test_month):
    # Load dataset
    dataset = pd.read_csv('data/train_bike.csv', header=0,  sep=',',
                          parse_dates=['datetime'], index_col='datetime').drop(
        columns=['casual', 'registered']
    )
    test: pd.DataFrame = dataset.loc[
        lambda df: (df.index.year == test_year) & (
            df.index.month == test_month)
    ]
    true_count = test.pipe(time_features)['count'].values

    # Convert it to the invocations API format
    test_json_str: str = (test.pipe(time_features)
                          .drop(columns='count')
                          .to_json(orient='split', index=False))
    test_json = json.loads(test_json_str)

    # Split it into n-prediction chunks
    n = 100
    all_predictions = []
    for i, (chunk_data, chunk_true_count) in enumerate(
        zip(
            chunks(test_json['data'], n),
            chunks(true_count, n)
        )
        ):
        chunk_json = deepcopy(test_json)

        chunk_json['columns'] = list(range(len(chunk_json['columns'])))

        chunk_json['data'] = chunk_data

        chunk_json['labels'] = chunk_true_count.tolist()

        response = requests.post(
            URL,
            json=chunk_json
        )

        assert response.status_code == 200, \
            f'expected 200, got {response.status_code}, message: {response.content.decode("unicode_escape")}'

        predictions = response.json()
        all_predictions.extend(predictions)
    

    score = r2_score(true_count, all_predictions)
    return {
        'R2_test': score, 'test_year': test_year, 'test_month': test_month
    }




---

# Demo

## Setup (i.e. get to state 1)


* Deploy revision with docker image with tag `month-1`

`gcloud run deploy my-serving-image --image europe-north1-docker.pkg.dev/aalto-atss/cyclone-docker-repo/my-serving-image:month-1 --region=europe-north1`

* Delete everything from database

* Insert month 1 data into db

In [7]:
print(make_request(URL, 2011, test_month=1))

{'R2_test': 0.9319258150717562, 'test_year': 2011, 'test_month': 1}


## State 1

* Database has data for months {1}
* Model has been trained on month 1
* R2 is undefined

## Action 1: make a request for month 2

In [8]:
print(make_request(URL, 2011, test_month=2))

{'R2_test': 0.6654214142652897, 'test_year': 2011, 'test_month': 2}


## State 2

* Database has data for months {1, 2}
* Model has been trained on month 1
* R2 is >0.4

## Action 2: make a request for month 3

In [9]:
print(make_request(URL, 2011, test_month=3))

{'R2_test': 0.46844637823598234, 'test_year': 2011, 'test_month': 3}


## State 3

* Database has data for months {1, 2, 3}
* Model has been trained on month 1
* R2 is >0.4

## Action 3: make a request for month 4

In [10]:
print(make_request(URL, 2011, test_month=4))

{'R2_test': 0.16358972823864737, 'test_year': 2011, 'test_month': 4}


## State 4

* Database has data for months {1, 2, 3, 4}
* Model has been trained on month 1
* R2 is <0.4

## Action 4

Threshold is surpassed so a new model will be automatically retrained on the
latest data and redeployed.

## State 5

* Database has data for months {1, 2, 3, 4}
* Model has been trained on month 4
* R2 is undefined

## Action 5

In [11]:
print(make_request(URL, 2011, test_month=5))

{'R2_test': 0.5445912080295541, 'test_year': 2011, 'test_month': 5}


## State 6

* Database has data for months {1, 2, 3, 4, 5}
* Model has been trained on month 5
* R2 is >0.4