#### Import libs

In [1]:
import numpy as np
import tensorflow as tf

print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.17.0


In [2]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("distibuted_trainingAnd_params_tuning").master("spark://spark-master:7077") \
        .getOrCreate()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/09/30 08:33:20 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
24/09/30 08:33:20 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


#### Load Dataset

In [3]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = np.array(x_train)

print(x_train.shape)
print(y_train.shape)

(60000, 28, 28)
(60000,)


#### Define model method

In [4]:
# import libs for Tuning
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

# for distibuted training
from hyperopt import SparkTrials

In [5]:
def create_model(l1_noNode,l1_activation,l1_droupout):
    model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(l1_noNode, activation=l1_activation),
    tf.keras.layers.Dropout(l1_droupout),
    tf.keras.layers.Dense(10)
    ])
    return model

In [6]:
def train_model(p_epoch=2 ,p_optimizer="adam" ,
                l1_noNode=32,l1_activation="relu",l1_droupout=0.2):


    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model = create_model(l1_noNode,l1_activation,l1_droupout)


    model.compile(optimizer=p_optimizer,
              loss=loss_fn,
              metrics=['accuracy'])
    

  
    model.fit(x_train, y_train, epochs=p_epoch)
    eval_loss, eval_acc  = model.evaluate(x_test,  y_test, verbose=2)

    print("eval_loss, eval_acc : ",eval_loss, eval_acc)
    return model, eval_loss, eval_acc




In [7]:
def train_with_hyperopt(params):
    p_epoch = params["epochs"]
    p_optimizer = params["optimizer"]
    l1_noNode = params["l1_noNode"]
    l1_activation= params["l1_activation"]
    l1_droupout = params["l1_droupout"]

    model, eval_loss, eval_acc = train_model(p_epoch,p_optimizer,l1_noNode,l1_activation,l1_droupout)

    return {"loss": eval_loss, "status": STATUS_OK, "model": model, "val_f1_score": eval_acc}


In [8]:
search_params_space ={
    "epochs":[1,2,3,],
    "optimizer":["Adam","SGD","RMSprop"],
    "l1_noNode" : [32,64,128],
    "l1_activation" :["relu","softmax","tanh"],
    "l1_droupout": [0.2,0.3,0.4,0.5]

}


spark_trials = SparkTrials()
algo = tpe.suggest
print(algo)

Because the requested parallelism was None or a non-positive value, parallelism will be set to (4), which is Spark's default parallelism (4), or 1, whichever is greater. We recommend setting parallelism explicitly to a positive value because the total of Spark task slots is subject to cluster sizing.


<function suggest at 0xffff69fc2200>


#### Define MLflow experiment

In [9]:
import mlflow
import time
mlflow_uri = "http://mlflow-server:8888/"
mlflow.set_tracking_uri(mlflow_uri)
time.sleep(5)
mlflow.set_experiment("distibuted_trainingAnd_params_tuning_with_mlflow")


MlflowException: Cannot set a deleted experiment 'distibuted_trainingAnd_params_tuning_with_mlflow' as the active experiment. You can restore the experiment, or permanently delete the experiment to create a new one.

In [10]:

import mlflow.pyspark.ml
mlflow.pyspark.ml.autolog()

In [11]:
import pyspark
pyspark.__version__

'3.5.2'

In [9]:
with mlflow.start_run(run_name='hyperopt') as run:
    argmin = fmin(
    fn= train_with_hyperopt,
    space=search_params_space,
    algo=algo,
    max_evals = 8,
    trials=spark_trials)

    

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



  0%|          | 0/8 [00:00<?, ?trial/s, best loss=?]

[Stage 0:>    (0 + 1) / 1][Stage 1:>    (0 + 1) / 1][Stage 2:>    (0 + 1) / 1]1]

: 

In [10]:
print("Best value found : ", argmin)
# Stop the SparkSession
spark.stop()

NameError: name 'argmin' is not defined