In [1]:
import pandas as pd
import os
import mlflow
import time
root_path = os.getcwd()
wind_farm_data = pd.read_csv(root_path+"/windfarm_data.csv",index_col=0)
wind_farm_data.head()

Unnamed: 0,temperature_00,wind_direction_00,wind_speed_00,temperature_08,wind_direction_08,wind_speed_08,temperature_16,wind_direction_16,wind_speed_16,power
2014-01-01,4.702022,106.74259,4.743292,7.189482,100.41638,6.593833,8.172301,99.288,5.967206,1959.3535
2014-01-02,7.695733,98.036705,6.142716,9.977118,94.03181,4.383676,9.690135,204.25444,1.696528,1266.6239
2014-01-03,9.608235,274.0612,10.514304,10.840864,242.87563,16.869741,8.991079,250.2683,12.038399,7545.6797
2014-01-04,6.955563,257.91022,7.18917,5.317223,254.2617,9.069233,3.021174,284.06537,4.590843,3791.0408
2014-01-05,0.830547,265.3944,4.263086,2.480239,104.79496,3.042063,4.227131,263.4169,3.899182,880.6115


In [2]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("v6_hper_param").master("spark://spark-master:7077") \
        .getOrCreate()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/05/04 03:50:41 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
25/05/04 03:50:42 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
25/05/04 03:50:42 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.


In [3]:
mlflow_uri = "http://mlflow-server:8888"
mlflow.set_tracking_uri(mlflow_uri)
time.sleep(5)
#mlflow.set_experiment(mlflow_uri)
mlflow.set_experiment("v6_hper_param")

2025/05/04 03:50:53 INFO mlflow.tracking.fluent: Experiment with name 'v6_hper_param' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/325099155118866680', creation_time=1746330653025, experiment_id='325099155118866680', last_update_time=1746330653025, lifecycle_stage='active', name='v6_hper_param', tags={}>

In [4]:
def get_training_data(wind_farm_data):
    training_data = pd.DataFrame(wind_farm_data["2014-01-01":"2018-01-01"])
    X = training_data.drop(columns="power")
    y = training_data["power"]    
    return X,y

def get_validation_data(wind_farm_data):
    validation_data = pd.DataFrame(wind_farm_data["2018-01-01":"2019-01-01"])
    X = validation_data.drop(columns="power")
    y = validation_data["power"]
    return X,y


In [5]:
def get_weather_and_forecast():
    format_date = lambda pd_date : pd_date.strftime("%Y-%m-%d")
    today = pd.Timedelta('today').normalize()
    week_ago = today - pd.Timedelta(days=5)
    week_later = today + pd.Timedelta(days=5)


    past_power_output = pd.DataFrame(wind_farm_data[format_date(week_ago):format_date(today)])
    weather_and_forecast = pd.DataFrame(wind_farm_data[format_date(week_ago):format_date(week_later)])
    if len(weather_and_forecast) < 10 :
        past_power_output = pd.DataFrame(wind_farm_data).iloc[-10:-5]
        weather_and_forecast = pd.DataFrame(wind_farm_data).iloc[-10]
    return weather_and_forecast.drops(columns="power"),past_power_output["power"]

In [6]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential


In [8]:
def train_keras_model(X,y):
    model = Sequential()
    model.add(Dense(100, input_shape = (X.shape[-1],), activation = "relu"))
    model.add(Dense(1))
    model.compile(loss="mse", optimizer="adam")
    
    model.fit(X,y,epochs=30,batch_size=64, validation_split=0.2,verbose=1)
    return model

In [9]:
X_test, y_test = get_validation_data(wind_farm_data)
sample_x_test = X_test.iloc[:1]
print(sample_x_test.shape)
X_test.head()

(1, 9)


Unnamed: 0,temperature_00,wind_direction_00,wind_speed_00,temperature_08,wind_direction_08,wind_speed_08,temperature_16,wind_direction_16,wind_speed_16
2018-01-01,4.290002,99.38345,11.876492,5.60676,99.889145,11.322479,6.574509,106.04957,9.184392
2018-01-02,4.43341,101.93347,9.378234,6.396653,100.942276,9.721888,6.416043,107.416336,7.147915
2018-01-03,5.295909,111.76401,7.841654,6.982701,111.81693,7.587134,6.997799,109.31719,6.987251
2018-01-04,3.462663,101.46492,7.504305,5.784602,100.363434,6.056359,5.774397,103.49372,4.61069
2018-01-05,4.275198,102.928795,5.612769,6.063427,103.45712,5.794476,6.922954,101.4929,7.619769


In [10]:

import mlflow
import mlflow.tensorflow
import mlflow.keras
from mlflow.models.signature import infer_signature,set_signature


In [11]:
X_train, y_train = get_training_data(wind_farm_data)

with mlflow.start_run() as run:
    mlflow.tensorflow.autolog()

    model = train_keras_model(X_train,y_train)
    run_id = mlflow.active_run().info.run_id
    signature =  infer_signature(X_test, model.predict(X_test))
    model_uri = f"runs:/{run.info.run_id}/model"
    set_signature(model_uri, signature)

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 244ms/step - loss: 10606917.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 10733498.0000 - val_loss: 7696635.0000
Epoch 2/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 16ms/step - loss: 13765286.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 10255010.0000 - val_loss: 7155647.5000
Epoch 3/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 9ms/step - loss: 7678917.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 9247251.0000 - val_loss: 6541872.0000
Epoch 4/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 8ms/step - loss: 6424872.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 7929045.5000 - val_loss: 5915883.0000
Epoch 5/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 15ms/step - loss: 6376654.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 7020505.5000 - val_loss: 5348511.0000
Epoch 6/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 7155714.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 6639404.5000 - val_loss: 4901183.0000
Epoch 7/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 6954344.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6333361.5000 - val_loss: 4621732.5000
Epoch 8/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 4666313.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5851641.0000 - val_loss: 4508672.0000
Epoch 9/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5857868.5000 - val_loss: 4510233.5000
Epoch 10/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5305683.5000 - val_loss: 4553623.5000
Epoch 11/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5551314.0000 - val_loss: 4593065.5000
Epoch 12/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5657745.5000 - val_loss: 4610975.0000
Epoch 13/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5901254.5000 - val_loss: 4580340.0000
Epoch 14/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5841957.5000 - val_loss: 4567889.0000
Epoch 15/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 544



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 5887429.0000 - val_loss: 4502358.0000
Epoch 18/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 13ms/step - loss: 3505008.2500



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5114369.0000 - val_loss: 4479937.0000
Epoch 19/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 5587860.5000 - val_loss: 4513833.0000
Epoch 20/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 10ms/step - loss: 3840801.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4950932.5000 - val_loss: 4474856.5000
Epoch 21/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 11ms/step - loss: 5029824.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5489441.5000 - val_loss: 4459196.0000
Epoch 22/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 16ms/step - loss: 4710597.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5044843.0000 - val_loss: 4425120.0000
Epoch 23/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 11ms/step - loss: 5271083.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5279302.0000 - val_loss: 4421755.0000
Epoch 24/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 8915858.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5577944.5000 - val_loss: 4394499.0000
Epoch 25/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 4884740.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5322078.5000 - val_loss: 4350926.0000
Epoch 26/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 11ms/step - loss: 5528445.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4996519.5000 - val_loss: 4319396.5000
Epoch 27/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 13ms/step - loss: 3680232.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4840074.5000 - val_loss: 4305963.0000
Epoch 28/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 11ms/step - loss: 5014368.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5050943.0000 - val_loss: 4305866.5000
Epoch 29/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 5913522.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5304188.5000 - val_loss: 4257893.5000
Epoch 30/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 13ms/step - loss: 3924296.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4645850.5000 - val_loss: 4222182.0000




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 63.88it/s]

🏃 View run righteous-smelt-997 at: http://mlflow-server:8888/#/experiments/325099155118866680/runs/cde3afed224745b78d337eaa7d70462d
🧪 View experiment at: http://mlflow-server:8888/#/experiments/325099155118866680





In [12]:

model_name = "power-forecasting-model"
print(run_id)


cde3afed224745b78d337eaa7d70462d


In [13]:
loaded_model = mlflow.tensorflow.load_model(f"runs:/{run_id}/model")
loaded_model.summary()

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 130.12it/s]  
  saveable.load_own_variables(weights_store.get(inner_path))


In [22]:
import mlflow
logged_model = f'runs:/{run_id}/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
y_pred = loaded_model.predict(pd.DataFrame(X_test))
y_pred

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 546.16it/s]  

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 940us/step



  saveable.load_own_variables(weights_store.get(inner_path))


Unnamed: 0,0
2018-01-01,1385.388672
2018-01-02,1326.935791
2018-01-03,1355.011108
2018-01-04,1196.310913
2018-01-05,1226.802856
...,...
2018-12-28,2987.428711
2018-12-29,2627.895264
2018-12-30,2779.671387
2018-12-31,1490.799438


In [23]:
import mlflow

artifact_path = "model"
model_uri = "runs:/{run_id}/{artifact_path}".format(run_id=run_id, artifact_path=artifact_path)
print("model_uri : ",model_uri)


model_version = mlflow.register_model(model_uri, model_name)

Successfully registered model 'power-forecasting-model'.
2024/10/31 15:33:20 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: power-forecasting-model, version 1


model_uri :  runs:/93dc9370ebde4904945ded68045947ef/model


Created version '1' of model 'power-forecasting-model'.
