## Distibuted Training With Hyperparameter tuning using Hyperopt and MLflow

#### Import libs

In [1]:
import numpy as np
import tensorflow as tf
import mlflow
from mlflow.models import infer_signature
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.18.0


In [2]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("distibuted_trainingAnd_params_tuning").master("spark://spark-master:7077") \
        .getOrCreate()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/10/31 17:12:00 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


#### Load Dataset

In [3]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = np.array(x_train)

print(x_train.shape)
print(y_train.shape)

(60000, 28, 28)
(60000,)


#### Define model method

In [4]:
# import libs for Tuning
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

# for distibuted training
from hyperopt import SparkTrials

In [5]:
def create_model(l1_noNode,l1_activation,l1_droupout):
    model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(l1_noNode, activation=l1_activation),
    tf.keras.layers.Dropout(l1_droupout),
    tf.keras.layers.Dense(10)
    ])
    return model

In [6]:
def train_model(p_epoch=2 ,p_optimizer="adam" ,
                l1_noNode=32,l1_activation="relu",l1_droupout=0.2):


    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model = create_model(l1_noNode,l1_activation,l1_droupout)


    model.compile(optimizer=p_optimizer,
              loss=loss_fn,
              metrics=['accuracy'])
    

  
    model.fit(x_train, y_train, epochs=p_epoch)
    eval_loss, eval_acc  = model.evaluate(x_test,  y_test, verbose=2)

    print("eval_loss, eval_acc : ",eval_loss, eval_acc)
    return model, eval_loss, eval_acc




In [7]:
# train_model()

In [8]:
def train_with_hyperopt(params):
    with mlflow.start_run(nested=True) as run:
        p_epoch = params["epochs"]
        p_optimizer = params["optimizer"]
        l1_noNode = params["l1_noNode"]
        l1_activation= params["l1_activation"]
        l1_droupout = params["l1_droupout"]

        model, eval_loss, eval_acc = train_model(p_epoch,p_optimizer,l1_noNode,l1_activation,l1_droupout)



        best_model, eval_loss, eval_acc = train_model(p_epoch=p_epoch,p_optimizer=p_optimizer,
                                                l1_activation=l1_activation,
                                                l1_droupout=l1_droupout,
                                                l1_noNode=l1_noNode)


        mlflow.log_metric("eval_loss",eval_loss)
        mlflow.log_metric("eval_acc",eval_acc)
        model_signature = infer_signature(x_train,best_model.predict(x_train))
        
        mlflow.keras.log_model(best_model,"ths_tune_model",signature=model_signature)

    return {"loss": eval_loss, "status": STATUS_OK, "model": model, "eval_acc": eval_acc}


In [9]:
epochs = [1,2,3,4]
optimizer = ["Adam","SGD","RMSprop"]
l1_noNode = [32,64,128] 
l1_activation = ["relu","softmax","tanh"]
l1_droupout = [0.2,0.3,0.4,0.5]

search_params_space ={
    "epochs": hp.choice("epochs",epochs),
    "optimizer" : hp.choice("optimizer",optimizer),
    "l1_noNode" : hp.choice("l1_noNode",l1_noNode ),
    "l1_activation" : hp.choice("l1_activation",l1_activation),
    "l1_droupout": hp.choice("l1_droupout", l1_droupout)

}


spark_trials = SparkTrials()
algo = tpe.suggest
print(algo)

Because the requested parallelism was None or a non-positive value, parallelism will be set to (2), which is Spark's default parallelism (2), or 1, whichever is greater. We recommend setting parallelism explicitly to a positive value because the total of Spark task slots is subject to cluster sizing.


<function suggest at 0xffff899562a0>


#### Define MLflow experiment

In [10]:
import mlflow
import time
mlflow_uri = "http://mlflow-server:8888/"
mlflow.set_tracking_uri(mlflow_uri)
time.sleep(5)
mlflow.set_experiment("distibuted_trainingAnd_params_tuning_with_mlflow_1")


<Experiment: artifact_location='mlflow-artifacts:/728929677351795026', creation_time=1730389729654, experiment_id='728929677351795026', last_update_time=1730389729654, lifecycle_stage='active', name='distibuted_trainingAnd_params_tuning_with_mlflow_1', tags={}>

In [11]:

import mlflow.pyspark.ml
mlflow.pyspark.ml.autolog()

In [12]:
import pyspark
pyspark.__version__

'3.5.3'

In [13]:

mlflow.tensorflow.autolog()
mlflow.enable_system_metrics_logging()
time.sleep(5)



In [14]:
with mlflow.start_run() as run:

    argmin = fmin(
    fn= train_with_hyperopt,
    space=search_params_space,
    algo=algo,
    max_evals = 3,
    # for distrubuted training
    trials=spark_trials 
    )
   


The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet

2024/10/31 17:12:11 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


  0%|          | 0/3 [00:00<?, ?trial/s, best loss=?]


2024/10/31 17:12:11 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.

  super().__init__(**kwargs)



[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4:11[0m 134ms/step - accuracy: 0.0625 - loss: 2.3106
[1m  86/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 594us/step - accuracy: 0.0823 - loss: 2.3049  
[1m 185/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 548us/step - accuracy: 0.0861 - loss: 2.3044
[1m 286/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 530us/step - accuracy: 0.0897 - loss: 2.3041
[1m 383/1875[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 527us/step - accuracy: 0.0935 - loss: 2.3037
[1m 483/1875[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 522us/step - accuracy: 0.0973 - loss: 2.3035
[1m 583/1875[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 519us/step - accuracy: 0.1003 - loss: 2.3033
[1m 684/1875[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m0s[0m 516us/step - accuracy: 0.1029 - loss: 2.3031
[1m 783/1875[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m0s[0m 514us/step - accuracy: 0.1050 - loss: 2.30




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 529us/step - accuracy: 0.1168 - loss: 2.3009

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step

313/313 - 0s - 505us/step - accuracy: 0.1174 - loss: 2.2902

eval_loss, eval_acc :                                
2.2901601791381836                                   
0.11739999800920486                                  
  0%|          | 0/3 [00:04<?, ?trial/s, best loss=?]

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:38[0m 117ms/step - accuracy: 0.0312 - loss: 2.3045
[1m  81/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 631us/step - accuracy: 0.0901 - loss: 2.3031  
[1m 178/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 568us/step - accuracy: 0.0966 - loss: 2.3030
[1m 273/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 554us/step - accuracy: 0.0992 - loss: 2.3028
[1m 351/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 573us/step - accuracy: 0.1010 - loss: 2.3027
[1m 448/1875[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 562us/step - accuracy: 0.1033 - loss: 2.3025
[1m 542/1875[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 557us/step - accuracy: 0.1053 - loss: 2.3023
[1m 633/1875[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 557us/step - accuracy: 0.1069 - loss: 2.3021
[1m 717/1875[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m0s[0m 562us/step - accuracy: 0.1082 - loss: 2.30




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 582us/step - accuracy: 0.1168 - loss: 2.2995

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step

313/313 - 0s - 477us/step - accuracy: 0.1171 - loss: 2.2854

eval_loss, eval_acc :                                
2.2853591442108154                                   
0.11710000038146973                                  
[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m24s[0m 13ms/step
[1m   2/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 1ms/step
[1m 129/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 404us/step
[1m 130/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 415us/step
[1m 131/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 413us/step
[1m 263/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 397us/step
[1m 264/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s

2024/10/31 17:12:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run able-finch-99 at: http://mlflow-server:8888/#/experiments/728929677351795026/runs/5bf9846553954a4dacaea1797e1001fe.

2024/10/31 17:12:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-server:8888/#/experiments/728929677351795026.

2024/10/31 17:12:22 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...

2024/10/31 17:12:22 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!



 33%|███▎      | 1/3 [00:10<00:21, 10.81s/trial, best loss: 2.2853591442108154]


2024/10/31 17:12:22 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.



Epoch 1/4                                                                      

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:25[0m 206ms/step - accuracy: 0.1250 - loss: 2.3040
[1m  47/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 1ms/step - accuracy: 0.2116 - loss: 2.2903    
[1m  98/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.3155 - loss: 2.2709
[1m 147/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.3710 - loss: 2.2490
[1m 197/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.4093 - loss: 2.2255
[1m 248/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.4386 - loss: 2.2014
[1m 299/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.4600 - loss: 2.1783
[1m 350/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.4764 - loss: 2.1561
[1m 393/1875[0m [32m━━━━[0m[37m━━━━━




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.5876 - loss: 1.7306  

Epoch 2/4                                                                      

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m20s[0m 11ms/step - accuracy: 0.6875 - loss: 0.9396
[1m  52/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 994us/step - accuracy: 0.6409 - loss: 1.0394
[1m 102/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 999us/step - accuracy: 0.6449 - loss: 1.0222
[1m 153/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 996us/step - accuracy: 0.6475 - loss: 1.0145
[1m 205/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 990us/step - accuracy: 0.6497 - loss: 1.0075
[1m 256/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 989us/step - accuracy: 0.6507 - loss: 1.0036
[1m 308/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 986us/step - accuracy: 0.6511 - loss: 1.0005
[1m 359/1875[0m [32m━━━[0m[




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6598 - loss: 0.9593

Epoch 3/4                                                                      

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m20s[0m 11ms/step - accuracy: 0.6250 - loss: 1.0156
[1m  51/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7104 - loss: 0.8771  
[1m 101/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7070 - loss: 0.8750
[1m 153/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 998us/step - accuracy: 0.7066 - loss: 0.8717
[1m 205/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 993us/step - accuracy: 0.7080 - loss: 0.8654
[1m 255/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 994us/step - accuracy: 0.7093 - loss: 0.8601
[1m 306/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 995us/step - accuracy: 0.7106 - loss: 0.8552
[1m 357/1875[0m [32m━━━[0m[37m━




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.7282 - loss: 0.8056

Epoch 4/4                                                                      

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m20s[0m 11ms/step - accuracy: 0.8125 - loss: 0.5097
[1m  51/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7746 - loss: 0.6755  
[1m 100/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7786 - loss: 0.6731
[1m 151/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7780 - loss: 0.6775
[1m 202/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 998us/step - accuracy: 0.7773 - loss: 0.6795
[1m 253/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 997us/step - accuracy: 0.7775 - loss: 0.6794
[1m 304/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 996us/step - accuracy: 0.7777 - loss: 0.6791
[1m 354/1875[0m [32m━━━[0m[37m━━━




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1000us/step - accuracy: 0.7765 - loss: 0.6780

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step        
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step        

313/313 - 0s - 537us/step - accuracy: 0.9292 - loss: 0.3224                    

eval_loss, eval_acc :                                                          
0.3223511576652527                                                             
0.9291999936103821                                                             
 33%|███▎      | 1/3 [00:21<00:21, 10.81s/trial, best loss: 2.2853591442108154]

Epoch 1/4                                                                      

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7:02[0m 225ms/step - accuracy: 0.1875 - loss: 2.2973
[1m  30/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 2ms/step - accuracy: 0.1739 - loss: 2.2957    
[1m  74/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 1ms/step - accuracy: 0.2809 - loss: 2.2825
[1m 120/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 1ms/step - accuracy: 0.3555 - loss: 2.2639
[1m 161/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 1ms/step - accuracy: 0.3978 - loss: 2.2448
[1m 210/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 1ms/step - accuracy: 0.4324 - loss: 2.2200
[1m 259/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.4568 - loss: 2.1951
[1m 308/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.4741 - loss: 2.1713
[1m 359/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.5750 - loss: 1.7258

Epoch 2/4                                                                      

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m18s[0m 10ms/step - accuracy: 0.5000 - loss: 1.1277
[1m  51/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6706 - loss: 0.9893  
[1m 102/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 999us/step - accuracy: 0.6664 - loss: 0.9830
[1m 152/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6609 - loss: 0.9857  
[1m 200/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6586 - loss: 0.9862
[1m 245/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6576 - loss: 0.9852
[1m 293/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6563 - loss: 0.9844
[1m 342/1875[0m [32m━━━[0m[37m━━━━━




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6550 - loss: 0.9589

Epoch 3/4                                                                      

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19s[0m 10ms/step - accuracy: 0.7188 - loss: 0.6684
[1m  50/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6835 - loss: 0.8562  
[1m  97/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6846 - loss: 0.8652
[1m 146/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6866 - loss: 0.8640
[1m 195/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6868 - loss: 0.8646
[1m 245/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6872 - loss: 0.8635
[1m 295/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.6878 - loss: 0.8613
[1m 337/1875[0m [32m━━━[0m[37m━━━━━━━━━




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.7027 - loss: 0.8325

Epoch 4/4                                                                      

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19s[0m 10ms/step - accuracy: 0.7500 - loss: 0.6360
[1m  51/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7595 - loss: 0.7006  
[1m 102/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7636 - loss: 0.6980
[1m 153/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7648 - loss: 0.6951
[1m 203/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7639 - loss: 0.6972
[1m 253/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7629 - loss: 0.7000
[1m 303/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.7624 - loss: 0.7014
[1m 354/1875[0m [32m━━━[0m[37m━━━━━━━━━




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.7597 - loss: 0.7054

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step        
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step        

313/313 - 0s - 514us/step - accuracy: 0.9260 - loss: 0.3355                    

eval_loss, eval_acc :                                                          
0.33554935455322266                                                            
0.9259999990463257                                                             
[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m32s[0m 17ms/step          
[1m 117/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 434us/step
[1m 118/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 439us/step
[1m 119/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 446us/step
[1m 244/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 423us/step
[1m 245/1

2024/10/31 17:12:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run aged-panda-814 at: http://mlflow-server:8888/#/experiments/728929677351795026/runs/0133f9104f5745e794d3dadc680702cb.

2024/10/31 17:12:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-server:8888/#/experiments/728929677351795026.

2024/10/31 17:12:46 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...

2024/10/31 17:12:46 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!



 67%|██████▋   | 2/3 [00:34<00:18, 18.16s/trial, best loss: 0.33554935455322266]


2024/10/31 17:12:46 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.



Epoch 1/2                                                                       

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:52[0m 220ms/step - accuracy: 0.1875 - loss: 2.3000
[1m  69/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 741us/step - accuracy: 0.2223 - loss: 2.2790  
[1m 147/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 690us/step - accuracy: 0.2882 - loss: 2.2460
[1m 232/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 653us/step - accuracy: 0.3194 - loss: 2.2086
[1m 318/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 634us/step - accuracy: 0.3373 - loss: 2.1729
[1m 405/1875[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 622us/step - accuracy: 0.3485 - loss: 2.1402
[1m 459/1875[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 659us/step - accuracy: 0.3533 - loss: 2.1219
[1m 539/1875[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 655us/step - accuracy: 0.3590 - loss: 2.0968
[1m 623/1875[0m [32m━━━━━




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 615us/step - accuracy: 0.4035 - loss: 1.8527

Epoch 2/2                                                                       

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19s[0m 10ms/step - accuracy: 0.6250 - loss: 1.4755
[1m  86/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 591us/step - accuracy: 0.5025 - loss: 1.4125
[1m 172/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 588us/step - accuracy: 0.4905 - loss: 1.4163
[1m 258/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 587us/step - accuracy: 0.4856 - loss: 1.4190
[1m 345/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 586us/step - accuracy: 0.4824 - loss: 1.4202
[1m 429/1875[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 588us/step - accuracy: 0.4802 - loss: 1.4213
[1m 516/1875[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 586us/step - accuracy: 0.4787 - loss: 1.4222
[1m 602/1875[0m [32m━━━━━━[




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 592us/step - accuracy: 0.4794 - loss: 1.4035

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step         
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step         

313/313 - 0s - 469us/step - accuracy: 0.8996 - loss: 0.7991                     

eval_loss, eval_acc :                                                           
0.7991117238998413                                                              
0.8996000289916992                                                              
 67%|██████▋   | 2/3 [00:38<00:18, 18.16s/trial, best loss: 0.33554935455322266]

Epoch 1/2                                                                       

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:21[0m 204ms/step - accuracy: 0.0625 - loss: 2.3040
[1m  60/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 854us/step - accuracy: 0.1607 - loss: 2.2920  
[1m 136/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 746us/step - accuracy: 0.2658 - loss: 2.2614
[1m 220/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 689us/step - accuracy: 0.3209 - loss: 2.2260
[1m 289/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 700us/step - accuracy: 0.3462 - loss: 2.1980
[1m 372/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 679us/step - accuracy: 0.3660 - loss: 2.1672
[1m 459/1875[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 660us/step - accuracy: 0.3799 - loss: 2.1375
[1m 545/1875[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 648us/step - accuracy: 0.3903 - loss: 2.1098
[1m 627/1875[0m [32m━━━━━




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 613us/step - accuracy: 0.4447 - loss: 1.8473

Epoch 2/2                                                                       

[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m20s[0m 11ms/step - accuracy: 0.6250 - loss: 1.2297
[1m  36/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 1ms/step - accuracy: 0.5230 - loss: 1.3240  
[1m  60/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 2ms/step - accuracy: 0.5199 - loss: 1.3346
[1m  96/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 2ms/step - accuracy: 0.5188 - loss: 1.3401
[1m 161/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 1ms/step - accuracy: 0.5156 - loss: 1.3477
[1m 236/1875[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 1ms/step - accuracy: 0.5161 - loss: 1.3470
[1m 308/1875[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 988us/step - accuracy: 0.5170 - loss: 1.3447
[1m 383/1875[0m [32m━━━━[0m[37m━━━━━━━━━━━━




[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 755us/step - accuracy: 0.5271 - loss: 1.3199

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step         
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step         

313/313 - 0s - 625us/step - accuracy: 0.9222 - loss: 0.6762                     

eval_loss, eval_acc :                                                           
0.6761881709098816                                                              
0.9222000241279602                                                              
[1m   1/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m29s[0m 16ms/step           
[1m   2/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 2ms/step             
[1m   3/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 2ms/step             
[1m 112/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 479us/step 
[1m 113/1875[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 487us/s

2024/10/31 17:13:00 INFO mlflow.tracking._tracking_service.client: 🏃 View run entertaining-bat-608 at: http://mlflow-server:8888/#/experiments/728929677351795026/runs/910c4582099147fc8be5f15536ecbd89.

2024/10/31 17:13:00 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-server:8888/#/experiments/728929677351795026.

2024/10/31 17:13:00 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...

2024/10/31 17:13:00 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!



100%|██████████| 3/3 [00:48<00:00, 16.10s/trial, best loss: 0.33554935455322266]

2024/10/31 17:13:00 INFO mlflow.tracking._tracking_service.client: 🏃 View run mercurial-jay-210 at: http://mlflow-server:8888/#/experiments/728929677351795026/runs/544c41c0cd974472acb45d6ca1327f01.
2024/10/31 17:13:00 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-server:8888/#/experiments/728929677351795026.
2024/10/31 17:13:00 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/10/31 17:13:00 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!





In [18]:
spark.stop()

### Load trained tf model with Spark DataFrame

#### Set MLflow backend URI

In [16]:
import mlflow
import time
mlflow_uri = "http://mlflow-server:8888/"
mlflow.set_tracking_uri(mlflow_uri)
time.sleep(5)
mlflow.set_experiment("distibuted_trainingAnd_params_tuning_with_mlflow_1")


<Experiment: artifact_location='mlflow-artifacts:/728929677351795026', creation_time=1730389729654, experiment_id='728929677351795026', last_update_time=1730389729654, lifecycle_stage='active', name='distibuted_trainingAnd_params_tuning_with_mlflow_1', tags={}>

#### Initialize SparkSession

In [17]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("distibuted_trainingAnd_params_tuning").master("spark://spark-master:7077") \
        .getOrCreate()

In [18]:
from pyspark.sql.functions import struct, col
logged_model = 'runs:/9794c5f6d6dc41bf9ae605e2abf80364/ths_tune_model'

# Load model as a Spark UDF. Override result_type if the model does not return double values.
loaded_model =  mlflow.pyfunc.load_model(logged_model)


  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 6/6 [00:00<00:00, 1154.71it/s] 




In [19]:
type(x_test)

numpy.ndarray

#### Load dataset

In [20]:
import pandas as pd
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
print(x_train.shape)

print(x_test.shape)


(60000, 28, 28)
(10000, 28, 28)


In [21]:
pred_ = loaded_model.predict(x_test)
print(pred_)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 394us/step
[[ -8.664396  -14.909999   -3.0911999 ...   8.796654   -8.077242
   -6.6865964]
 [ -6.3493648 -11.766148    9.677436  ... -26.71225    -6.804982
  -25.417377 ]
 [ -9.701519    4.1610136  -2.233385  ...  -2.4515321  -3.1858158
   -6.341231 ]
 ...
 [-18.085354  -16.34156    -7.866031  ...  -2.9642308  -5.7162447
   -1.7017349]
 [ -6.84557   -10.090346   -9.494044  ... -12.040836   -1.1177579
  -11.440213 ]
 [ -5.868529  -17.192024   -4.854329  ... -22.456984  -11.155085
  -16.662743 ]]


In [22]:
# Stop the SparkSession
spark.stop()

In [23]:
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
print(X.shape)

(150, 4)
