In [1]:
import os

In [2]:
%pwd

'c:\\DataScience\\Projects\\Next_Word_Predictor\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\DataScience\\Projects\\Next_Word_Predictor'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    array_path: Path
    model_name: str
    units_lstm: int
    units_dense: int
    activation: str
    input_dim: int
    output_dim: int
    input_length:int
    loss: str
    optimizer: str
    metrics: list
    epoch: int

In [6]:
from NWPproject.constants import *
from NWPproject.utils.common import read_yaml_file, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,):

        self.config = read_yaml_file(config_filepath)
        self.params = read_yaml_file(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        units_lstm_params = self.params.LSTM
        units_dense_params = self.params.Dense
        activation_params = self.params.Dense
        input_dim_params = self.params.Embedding
        output_dim_params = self.params.Embedding
        input_length_params = self.params.Embedding
        loss_params = self.params.compile
        optimizer_params = self.params.compile
        metrics_params = self.params.compile
        epoch_params = self.params.fit

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            array_path=config.array_path,
            model_name=config.model_name,
            units_lstm= units_lstm_params.units_lstm,
            units_dense = units_dense_params.units_dense,
            activation = activation_params.activation,
            input_dim = input_dim_params.input_dim,
            output_dim = output_dim_params.output_dim,
            input_length = input_length_params.input_length,
            loss = loss_params.loss,
            optimizer = optimizer_params.optimizer,
            metrics = metrics_params.metrics,
            epoch = epoch_params.epoch
        )

        return model_trainer_config

In [8]:
import os
import numpy as np
from NWPproject.logging import logger
import joblib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding

In [9]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):
        loaded_arrays = np.load(self.config.array_path)

        loaded_array1 = loaded_arrays['arr1']
        loaded_array2 = loaded_arrays['arr2']
        # loaded_array2=loaded_array2.flatten()

        print("Loaded array1:", type(loaded_array1))
        print("Loaded array2:", type(loaded_array2))

        print(loaded_array1.shape)
        print(loaded_array2.shape)

        model=Sequential()
        model.add(Embedding(self.config.input_dim,self.config.output_dim,input_length=self.config.input_length))
        model.add(LSTM(self.config.units_lstm))
        model.add(Dense(self.config.units_dense,activation=self.config.activation))

        model.compile(loss=self.config.loss,optimizer=self.config.optimizer,metrics=self.config.metrics)
        print(model.summary())

        model.fit(loaded_array1,loaded_array2,epochs=self.config.epoch)

        joblib.dump(model, os.path.join(self.config.root_dir, self.config.model_name))

In [10]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_model_trainer_config()
    data_transformation = ModelTrainer(config=data_transformation_config)
    data_transformation.train()
except Exception as e:
    raise e

[2024-07-01 22:51:44,438: INFO:common: yaml file: config\config.yaml loaded successfully]
[2024-07-01 22:51:44,445: INFO:common: yaml file: params.yaml loaded successfully]
[2024-07-01 22:51:44,445: INFO:common: createD Directory at:artifacts]
[2024-07-01 22:51:44,449: INFO:common: createD Directory at:artifacts/model_trainer]
Loaded array1: <class 'numpy.ndarray'>
Loaded array2: <class 'numpy.ndarray'>
(379, 39)
(379, 221)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 39, 100)           22100     
                                                                 
 lstm (LSTM)                 (None, 100)               80400     
                                                                 
 dense (Dense)               (None, 221)               22321     
                                                                 
Total params: 124821 (48