### Phone Recommender Model


In [1]:
import os

In [2]:
%pwd

'd:\\Imarticus_Learning\\12_Projects\\Phone_Recommendation_System\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Imarticus_Learning\\12_Projects\\Phone_Recommendation_System'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass
class ModelTrainConfig:
    root_dir: Path
    model_file: Path
    tokenizer_file: Path
    transform_data_file: Path
    model_evaluation_file: Path


@dataclass
class ModelTrainParams:
    embedding_dim: int
    output_classes: int
    epochs: int
    batch_size: int

In [6]:
from phone_recommender.constants import *
from phone_recommender.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH) -> None:
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

    def get_model_build_config(self):
        config = self.config.model_training
        params = self.params.parameters

        create_directories([config.root_dir])

        model_train_config = ModelTrainConfig(
            root_dir=config.root_dir,
            model_file=config.model_file,
            tokenizer_file=config.tokenizer_file,
            transform_data_file=config.transform_data_file,
            model_evaluation_file=config.model_evaluation_file,
        )

        model_train_params = ModelTrainParams(
            embedding_dim=params.embedding_dim,
            output_classes=params.output_classes,
            epochs=params.epochs,
            batch_size=params.batch_size,
        )

        return model_train_config, model_train_params

In [8]:
import pandas as pd
import pickle
from phone_recommender.logging import logger
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import LSTM, Dense, Embedding
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

]


### Model Building and Evaluation


In [9]:
class ModelTrainer:
    def __init__(self, config=ModelTrainConfig, params=ModelTrainParams) -> None:
        self.config = config
        self.params = params

    def get_transformed_data(self):
        transform_data_file = self.config.transform_data_file
        if not os.path.exists(transform_data_file):
            logger.info("No transform data file please check if data transform is complete")
        else:
            df = pd.read_csv(self.config.transform_data_file)
            return df

    def build_model(self, df: pd.DataFrame):
        text = list(df['text'])
        clusters = df['class']

        # Initialize the Tokenizer
        tokenizer = Tokenizer()
        tokenizer.fit_on_texts(text)

        # Convert text to sequences of integers
        sequences = tokenizer.texts_to_sequences(text)

        # Pad sequences to make them of equal length (required for neural networks)
        max_sequence_length = max(map(len, sequences))
        print(max_sequence_length)
        padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

        # Data Sampling
        X = pd.DataFrame(padded_sequences)
        X.head()

        y = to_categorical(clusters)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        embedding_dim = self.params.embedding_dim
        vocab_size = len(tokenizer.word_index) + 1
        output_classes = self.params.output_classes

        model = Sequential()
        model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=X.shape[1]))
        model.add(LSTM(100))
        model.add(Dense(output_classes, activation='softmax'))

        # Compile the model
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        print(model.summary())

        # Train the model
        epochs = self.params.epochs
        batch_size = self.params.batch_size
        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)

        accuracy = model.evaluate(X_test, y_test)[1]
        with open(self.config.model_evaluation_file, "w") as f:
            f.write(f'Test Accuracy: {accuracy * 100:.2f}%')

        return model, tokenizer

    def save_model_tokenizer(self, model: Sequential, tokenizer: Tokenizer):
        model.save(self.config.model_file)

        # Save the tokenizer using pickle
        tokenizer_file = self.config.tokenizer_file

        with open(tokenizer_file, 'wb') as handle:
            pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [10]:
try:
    config = ConfigurationManager()
    model_build_config, model_params_config = config.get_model_build_config()
    model_trainer = ModelTrainer(model_build_config, model_params_config)
    df = model_trainer.get_transformed_data()
    model, tokenizer = model_trainer.build_model(df)
    model_trainer.save_model_tokenizer(model, tokenizer)
except Exception as e:
    raise e

[2023-12-10 00:28:43,102: INFO: common: yaml file: config\config.yaml loads successfully]
[2023-12-10 00:28:43,110: INFO: common: yaml file: params.yaml loads successfully]
[2023-12-10 00:28:43,111: INFO: common: created directory at : artifacts/model_training]


54
]
]
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 54, 50)            29350     
                                                                 
 lstm (LSTM)                 (None, 100)               60400     
                                                                 
 dense (Dense)               (None, 3)                 303       
                                                                 
Total params: 90053 (351.77 KB)
Trainable params: 90053 (351.77 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/25
]
]
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25

  saving_api.save_model(


### Finish
