In [20]:
%pwd

'C:\\Users\\kural\\Desktop\\Projects\\End_To_End_MLops'

In [21]:
import os
from pathlib import Path
os.chdir(Path("C:\\Users\\kural\\Desktop\\Projects\\End_To_End_MLops\\"))

In [22]:
%pwd

'C:\\Users\\kural\\Desktop\\Projects\\End_To_End_MLops'

In [23]:
from software_defect_prediction.constants import *
from software_defect_prediction.utils.common import *
from software_defect_prediction.entity.config_entity import ModelTrainerConfig
from software_defect_prediction.config.configuration import ConfigurationManager

import shutil

In [24]:
from importlib.metadata import files
from sys import exception
import pandas as pd

class Model_Trainer():
    def __init__(self,model_trainer_config : ModelTrainerConfig, predictor_col : str) -> None:
        self.config = model_trainer_config
        self.predictor_col = predictor_col
        self.train_df = None
        
    def prepare_and_load_files(self) -> None:
        try :
            
            files_arr = [self.config.train_file,self.config.test_file]
            
            for file_name in files_arr:
                source_file_path = Path(Path(self.config.source_file_path) / Path(file_name))
                destination_file_path = Path(Path(self.config.root_dir) / Path(file_name))
                if os.path.exists(destination_file_path):
                    os.remove(destination_file_path)

                shutil.copy(source_file_path,self.config.root_dir)

            self.train_df = pd.read_csv(Path(Path(self.config.root_dir) / Path(self.config.train_file)))
            logger.info("input file loaded successfully")
            
        except exception as e:
            logger.error("input file loading failed")
            raise(e)
        

In [27]:
mod_tr = Model_Trainer(ConfigurationManager().get_model_trainer_config(),ConfigurationManager().get_data_schema().TARGET_COLUMN.name)
mod_tr.prepare_and_load_files()
train_df = mod_tr.train_df

[32m2024-05-29 10:21:05.491[0m | [1mINFO    [0m | [36msoftware_defect_prediction.utils.common[0m:[36mread_yaml[0m:[36m31[0m - [1myaml file: config\config.yaml loaded successfully[0m
[32m2024-05-29 10:21:05.493[0m | [1mINFO    [0m | [36msoftware_defect_prediction.utils.common[0m:[36mread_yaml[0m:[36m31[0m - [1myaml file: params.yaml loaded successfully[0m
[32m2024-05-29 10:21:05.497[0m | [1mINFO    [0m | [36msoftware_defect_prediction.utils.common[0m:[36mread_yaml[0m:[36m31[0m - [1myaml file: schema.yaml loaded successfully[0m
[32m2024-05-29 10:21:05.499[0m | [1mINFO    [0m | [36msoftware_defect_prediction.utils.common[0m:[36mcreate_directories[0m:[36m51[0m - [1mcreated directory at: artifacts[0m
[32m2024-05-29 10:21:05.499[0m | [1mINFO    [0m | [36msoftware_defect_prediction.utils.common[0m:[36mcreate_directories[0m:[36m51[0m - [1mcreated directory at: artifacts/model_trainer[0m
[32m2024-05-29 10:21:05.506[0m | [1mINFO    

In [31]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras_tuner.tuners import RandomSearch
import pandas as pd

# Step 1: Preprocess Data
train_df = pd.read_csv("C:\\Users\kural\\Desktop\\Projects\\End_To_End_MLops\\artifacts\\model_trainer\\train_data.csv") 
test_df = pd.read_csv("C:\\Users\kural\\Desktop\\Projects\\End_To_End_MLops\\artifacts\\model_trainer\\test_data.csv") 

X_train = train_df.drop(columns=['id','defects'])
X_test = test_df.drop(columns=['id','defects'])
y_train = train_df.defects.astype(int)
y_test  = test_df.defects.astype(int)

# Step 2: Define the DNN Model
def build_model(hp):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)))

    # Tune the number of layers and units per layer
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(tf.keras.layers.Dense(
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            activation=hp.Choice('activation', ['relu', 'tanh'])
        ))
    
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    # Tune the learning rate
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            hp.Float('learning_rate', 1e-4, 1e-2, sampling='LOG')
        ),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Step 3: Hyperparameter Tuning with Keras Tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='hyperparameter_tuning',
    project_name='defect_classification'
)

tuner.search_space_summary()

# Step 4: Perform Hyperparameter Tuning
tuner.search(X_train, y_train, epochs=10, validation_split=0.2, callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])

# Step 5: Evaluate the Best Model
best_model = tuner.get_best_models(num_models=1)[0]
best_model.summary()

loss, accuracy = best_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

Trial 5 Complete [00h 01m 22s]
val_accuracy: 0.8121241927146912

Best val_accuracy So Far: 0.8130132754643759
Total elapsed time: 00h 06m 33s


  saveable.load_own_variables(weights_store.get(inner_path))


[1m955/955[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8163 - loss: 0.4313
Test Accuracy: 0.8137
