In [1]:
from experiment.api import mlflow as mlflow_api
from experiment.utils import transformation

import numpy as np
import pandas as pd
import pathlib
import json

import mlflow as mlflow_lib

# import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from keras.preprocessing.text import tokenizer_from_json

In [2]:
working_dir = pathlib.Path.cwd()

In [3]:
mlflow = mlflow_api.MLFlow()
mlflow.clean(gc=True)

kill: usage: kill [-s sigspec | -n signum | -sigspec] pid | jobspec ... or kill -l [sigspec]


Run with ID 31bd7438a14a44c7a74d546524b82dd4 has been permanently deleted.


In [4]:
clean_annotations = pd.read_csv(
    working_dir.parent / "data" / "output" / "clean_annotations.csv"
)

# skip the last report
sentences = clean_annotations["relevant_text"].to_list()[-1:]
clean_labels = clean_annotations["classifications"].to_list()[-1:]

In [5]:
print(f"sentences: {sentences}")
print(f"clean_labels: {clean_labels}")

sentences: ['Sağ maksiller sinüste retansiyon kisti izlenmiştir.']
clean_labels: [3]


In [6]:
clean_sentences = transformation.sentence_cleaning_pipeline(sentences)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/gokasci/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
model_config_path = (
    working_dir.parent / "data" / "input" / "model_config" / "nlp_experiment.json"
)
model_config = mlflow.get_model_config(model_config_path)

In [8]:
# run the server
mlflow.run_server()

In [9]:
# get the best metric 
best_run = mlflow.get_best_run_by_metric("NLP Experiments", "val_accuracy")
best_run_id = best_run["run_id"]

[2023-08-20 22:15:02 +0200] [85983] [INFO] Starting gunicorn 21.2.0
[2023-08-20 22:15:02 +0200] [85983] [INFO] Listening at: http://127.0.0.1:9999 (85983)
[2023-08-20 22:15:02 +0200] [85983] [INFO] Using worker: sync
[2023-08-20 22:15:02 +0200] [85984] [INFO] Booting worker with pid: 85984
[2023-08-20 22:15:02 +0200] [85985] [INFO] Booting worker with pid: 85985
[2023-08-20 22:15:02 +0200] [85986] [INFO] Booting worker with pid: 85986
[2023-08-20 22:15:02 +0200] [85987] [INFO] Booting worker with pid: 85987


In [27]:
best_run_id

'31bd7438a14a44c7a74d546524b82dd4'

Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  5.89it/s]
[2023-08-20 22:23:39 +0200] [85983] [INFO] Handling signal: term
[2023-08-20 22:23:39 +0200] [85986] [INFO] Worker exiting (pid: 85986)
[2023-08-20 22:23:39 +0200] [85987] [INFO] Worker exiting (pid: 85987)
[2023-08-20 22:23:39 +0200] [85984] [INFO] Worker exiting (pid: 85984)
[2023-08-20 22:23:39 +0200] [85985] [INFO] Worker exiting (pid: 85985)
[2023-08-20 22:23:40 +0200] [85983] [INFO] Shutting down: Master


In [25]:
# serve the model
mlflow.serve_model(best_run_id)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Traceback (most recent call last):
  File "/Users/gokasci/Desktop/reports/.venv/bin/mlflow", line 8, in <module>
    sys.exit(cli())
             ^^^^^
  File "/Users/gokasci/Desktop/reports/.venv/lib/python3.11/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gokasci/Desktop/reports/.venv/lib/python3.11/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
         ^^^^^^^^^^^^^^^^
  File "/Users/gokasci/Desktop/reports/.venv/lib/python3.11/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gokasci/Desktop/reports/.venv/lib/python3.11/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
                           ^^^^^^^^^

In [11]:
tokenizer_artifact: json = mlflow_lib.artifacts.load_dict(
    artifact_uri=f"{best_run.artifact_uri}/data/tokenizer.json"
)
tokenizer: Tokenizer = tokenizer_from_json(json.dumps(tokenizer_artifact))

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00,  3.51it/s]


In [16]:
# predict a sentence
tokenizer.fit_on_texts(clean_sentences)
clean_sequences = tokenizer.texts_to_sequences(clean_sentences)
clean_sequences_padded = pad_sequences(
    clean_sequences,
    maxlen=model_config["max_input_length"],
    padding=model_config["padding_type"],
    truncating=model_config["trunc_type"],
)
clean_sequences_padded = np.array(clean_sequences_padded)
# predictions = model.predict(clean_sequences_padded)
# predictions = [np.argmax(prediction) for prediction in predictions]

In [17]:
clean_sequences_padded

array([[16, 46, 23, 97, 98,  4,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0]], dtype=int32)

1. Emergency
2. Normal
3. Non Emergency [Doctor]
4. Non Emergency [No Doctor]

In [22]:
type())

list

In [26]:
mlflow.get_predictions(best_run_id, [[1.0, 2.0, 3.0]])

ConnectionError: HTTPConnectionPool(host='127.0.0.1', port=1234): Max retries exceeded with url: /invocations/31bd7438a14a44c7a74d546524b82dd4/predict (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x2a814d910>: Failed to establish a new connection: [Errno 61] Connection refused'))