In [9]:
import pandas as pd
import os
import mlflow
import time
root_path = os.getcwd()
wind_farm_data = pd.read_csv(root_path+"/windfarm_data.csv",index_col=0)
wind_farm_data.head()

Unnamed: 0,temperature_00,wind_direction_00,wind_speed_00,temperature_08,wind_direction_08,wind_speed_08,temperature_16,wind_direction_16,wind_speed_16,power
2014-01-01,4.702022,106.74259,4.743292,7.189482,100.41638,6.593833,8.172301,99.288,5.967206,1959.3535
2014-01-02,7.695733,98.036705,6.142716,9.977118,94.03181,4.383676,9.690135,204.25444,1.696528,1266.6239
2014-01-03,9.608235,274.0612,10.514304,10.840864,242.87563,16.869741,8.991079,250.2683,12.038399,7545.6797
2014-01-04,6.955563,257.91022,7.18917,5.317223,254.2617,9.069233,3.021174,284.06537,4.590843,3791.0408
2014-01-05,0.830547,265.3944,4.263086,2.480239,104.79496,3.042063,4.227131,263.4169,3.899182,880.6115


In [10]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("v6_hper_param").master("spark://spark-master:7077") \
        .getOrCreate()

In [11]:
mlflow_uri = "http://mlflow-server:8888"
mlflow.set_tracking_uri(mlflow_uri)
time.sleep(5)
#mlflow.set_experiment(mlflow_uri)
mlflow.set_experiment("v6_hper_param")

2024/10/31 15:29:55 INFO mlflow.tracking.fluent: Experiment with name 'v6_hper_param' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/189670564069712955', creation_time=1730388595160, experiment_id='189670564069712955', last_update_time=1730388595160, lifecycle_stage='active', name='v6_hper_param', tags={}>

In [12]:
def get_training_data(wind_farm_data):
    training_data = pd.DataFrame(wind_farm_data["2014-01-01":"2018-01-01"])
    X = training_data.drop(columns="power")
    y = training_data["power"]    
    return X,y

def get_validation_data(wind_farm_data):
    validation_data = pd.DataFrame(wind_farm_data["2018-01-01":"2019-01-01"])
    X = validation_data.drop(columns="power")
    y = validation_data["power"]
    return X,y


In [13]:
def get_weather_and_forecast():
    format_date = lambda pd_date : pd_date.strftime("%Y-%m-%d")
    today = pd.Timedelta('today').normalize()
    week_ago = today - pd.Timedelta(days=5)
    week_later = today + pd.Timedelta(days=5)


    past_power_output = pd.DataFrame(wind_farm_data[format_date(week_ago):format_date(today)])
    weather_and_forecast = pd.DataFrame(wind_farm_data[format_date(week_ago):format_date(week_later)])
    if len(weather_and_forecast) < 10 :
        past_power_output = pd.DataFrame(wind_farm_data).iloc[-10:-5]
        weather_and_forecast = pd.DataFrame(wind_farm_data).iloc[-10]
    return weather_and_forecast.drops(columns="power"),past_power_output["power"]

In [14]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential


In [15]:
def train_keras_model(X,y):
    model = Sequential()
    model.add(Dense(100, input_shape = (X.shape[-1],), activation = "relu"))
    model.add(Dense(1))
    model.compile(loss="mse", optimizer="adam")
    
    model.fit(X,y,epochs=30,batch_size=64, validation_split=0.2,verbose=1)
    return model

In [16]:
X_test, y_test = get_validation_data(wind_farm_data)
sample_x_test = X_test.iloc[:1]
sample_x_test.shape

(1, 9)

In [17]:

import mlflow
import mlflow.tensorflow
import mlflow.keras
from mlflow.models.signature import infer_signature,set_signature


In [18]:
X_train, y_train = get_training_data(wind_farm_data)

with mlflow.start_run() as run:
    mlflow.tensorflow.autolog()

    model = train_keras_model(X_train,y_train)
    run_id = mlflow.active_run().info.run_id
    signature =  infer_signature(X_test, model.predict(X_test))
    model_uri = f"runs:/{run.info.run_id}/model"
    set_signature(model_uri, signature)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 238ms/step - loss: 12988388.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 10244672.0000 - val_loss: 7451830.5000
Epoch 2/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 9260350.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 9566446.0000 - val_loss: 6909640.5000
Epoch 3/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 19ms/step - loss: 7575016.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 8363846.0000 - val_loss: 6326609.5000
Epoch 4/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 13ms/step - loss: 5448505.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 8078007.0000 - val_loss: 5730381.0000
Epoch 5/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 6934125.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 7743610.0000 - val_loss: 5196462.0000
Epoch 6/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 20ms/step - loss: 6088286.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 6915271.0000 - val_loss: 4802162.5000
Epoch 7/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 13ms/step - loss: 6732795.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6487248.0000 - val_loss: 4581899.5000
Epoch 8/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 11ms/step - loss: 5825020.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6103422.5000 - val_loss: 4504374.0000
Epoch 9/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5503827.0000 - val_loss: 4516268.5000
Epoch 10/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5703549.5000 - val_loss: 4572879.0000
Epoch 11/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5636150.0000 - val_loss: 4593648.5000
Epoch 12/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5305569.0000 - val_loss: 4604722.0000
Epoch 13/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5703235.5000 - val_loss: 4611386.5000
Epoch 14/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 5404538.5000 - val_loss: 4587265.0000
Epoch 15/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 561



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5045319.0000 - val_loss: 4501432.5000
Epoch 20/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 9ms/step - loss: 6851518.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5651100.5000 - val_loss: 4497317.0000
Epoch 21/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 9ms/step - loss: 4650935.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5219603.5000 - val_loss: 4465053.5000
Epoch 22/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 11ms/step - loss: 3978530.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5175065.0000 - val_loss: 4460086.0000
Epoch 23/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 8ms/step - loss: 3036359.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4932400.0000 - val_loss: 4419591.5000
Epoch 24/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 10ms/step - loss: 5461253.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5153730.5000 - val_loss: 4408174.5000
Epoch 25/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5803255.0000 - val_loss: 4418295.5000
Epoch 26/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 8ms/step - loss: 3847863.2500



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5020731.0000 - val_loss: 4382614.5000
Epoch 27/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 5453280.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5417849.5000 - val_loss: 4356836.5000
Epoch 28/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 16ms/step - loss: 5462867.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5021649.0000 - val_loss: 4311076.5000
Epoch 29/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 9ms/step - loss: 5151917.0000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5174062.5000 - val_loss: 4296528.5000
Epoch 30/30
[1m 1/19[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 19ms/step - loss: 4660683.5000



[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4971981.5000 - val_loss: 4252085.5000




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 299.21it/s] 
2024/10/31 15:31:42 INFO mlflow.tracking._tracking_service.client: 🏃 View run painted-carp-586 at: http://mlflow-server:8888/#/experiments/189670564069712955/runs/93dc9370ebde4904945ded68045947ef.
2024/10/31 15:31:42 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-server:8888/#/experiments/189670564069712955.


In [19]:

model_name = "power-forecasting-model"
print(run_id)


93dc9370ebde4904945ded68045947ef


In [20]:
loaded_model = mlflow.tensorflow.load_model(f"runs:/{run_id}/model")
loaded_model.summary()

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 376.92it/s] 
  saveable.load_own_variables(weights_store.get(inner_path))


In [22]:
import mlflow
logged_model = f'runs:/{run_id}/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
y_pred = loaded_model.predict(pd.DataFrame(X_test))
y_pred

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 546.16it/s]  

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 940us/step



  saveable.load_own_variables(weights_store.get(inner_path))


Unnamed: 0,0
2018-01-01,1385.388672
2018-01-02,1326.935791
2018-01-03,1355.011108
2018-01-04,1196.310913
2018-01-05,1226.802856
...,...
2018-12-28,2987.428711
2018-12-29,2627.895264
2018-12-30,2779.671387
2018-12-31,1490.799438


In [23]:
import mlflow

artifact_path = "model"
model_uri = "runs:/{run_id}/{artifact_path}".format(run_id=run_id, artifact_path=artifact_path)
print("model_uri : ",model_uri)


model_version = mlflow.register_model(model_uri, model_name)

Successfully registered model 'power-forecasting-model'.
2024/10/31 15:33:20 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: power-forecasting-model, version 1


model_uri :  runs:/93dc9370ebde4904945ded68045947ef/model


Created version '1' of model 'power-forecasting-model'.
