In [1]:
import os
os.chdir("../")

In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    num_lstm_units: float
    learning_rate: float
    epochs: float
    early_stopping_patience: float
    nsteps: float

In [3]:
from orangePlatform.constants import *
from orangePlatform.utils.common import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.ConvLSTM2D

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,
            num_lstm_units=params.num_lstm_units,
            learning_rate=params.learning_rate,
            epochs=params.epochs,
            early_stopping_patience=params.early_stopping_patience,
            nsteps=params.nsteps
                    
        )

        return model_trainer_config

In [5]:
import pandas as pd
import os
from orangePlatform import logger
import joblib

In [8]:
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten, TimeDistributed
from keras.layers import ConvLSTM2D

In [16]:
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping


In [14]:
def create_lstm_model(num_lstm_units, learning_rate, nsteps):
    n_seq = 5
    n_steps =int(nsteps // n_seq)
    model = Sequential()
    model.add(ConvLSTM2D(filters=num_lstm_units, kernel_size=(1,2), activation='relu', input_shape=(n_seq, 1, n_steps, 1)))
    model.add(Flatten())
    model.add(Dense(1))
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse')
    

    return model



In [15]:

def split_sequence(sequence, n_steps):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the sequence
		if end_ix > len(sequence)-1:
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return np.array(X), np.array(y)

In [41]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    
    def train(self):
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)
        train=train_data.values
        test=test_data.values
        print(type(test))
        model = create_lstm_model(self.config.num_lstm_units, self.config.learning_rate, self.config.nsteps)
        X, y = split_sequence(train, self.config.nsteps)  # You need to define split_sequence
        n_features = 1
        n_seq =5
        n_steps = int(self.config.nsteps // n_seq)
        X = X.reshape((X.shape[0], n_seq, 1, n_steps, 1))
        X_val, y_val = split_sequence(test, self.config.nsteps)  # You need to define split_sequence
        X_val = X_val.reshape((X_val.shape[0], n_seq, 1, n_steps, 1))
                    # Define early stopping callback
        early_stopping = EarlyStopping(monitor='val_loss', patience=self.config.early_stopping_patience, restore_best_weights=True)
                    # Train the model and log metrics using MLflow
        history = model.fit(X, y, epochs=self.config.epochs, validation_data=(X_val, y_val), callbacks=[early_stopping])
        joblib.dump(model, os.path.join(self.config.root_dir, self.config.model_name))



In [42]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-05-11 00:53:26,763: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-11 00:53:26,766: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-11 00:53:26,768: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-05-11 00:53:26,770: INFO: common: created directory at: artifacts]
[2024-05-11 00:53:26,771: INFO: common: created directory at: artifacts/model_trainer]
<class 'numpy.ndarray'>
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
