In [1]:
import os
%pwd  # this tell us which path we are currently working , so based on the below output path we are working under the research file
os.chdir("c:\\datascience End to End Projects\\steel-plant-Load-Prediction-")  #  but i would like to work with main ProjectML_with_MLFlow file , so for getting i step back in path inorder to enter the main project file i used this command os.chdir("../")
%pwd

'c:\\datascience End to End Projects\\steel-plant-Load-Prediction-'

In [2]:
# Now iam creating the entity class which consist of config.yaml folder model trainer code part variables, along with that iam adding some more varaibles like alpha,l1_ratio,target_column inside my entity class
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    target_column: str  # this target column is present inside the Schema.yaml file which it tells us the quality of the Wine based on the value it returns

In [3]:
from PROJECTML.constants import *
from PROJECTML.utils.common import read_yaml, create_directories
from PROJECTML import logger

In [4]:
# this template we use for every stage like data_ingestion,data_validation,data_transformation, model trainer .. etc
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

# this is part of code for the Model trainerConfig which helps us to return the configuration
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer   # here iam reading the schema, params 
        #params = self.params.ElasticNet
        schema =  self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,
            #alpha = params.alpha,    # here from params iam taking the alpha l1_ratio
            #l1_ratio = params.l1_ratio, 
            target_column = schema.name # here from schema iam taking the name which i will return through target_column
            
        )

        return model_trainer_config # here iam returning all variables from the configuration

In [5]:
import pandas as pd
import os
from PROJECTML import logger
import joblib # here iam saving the model because i want to save the data
from sklearn.model_selection import train_test_split
from src.PROJECTML.config.configuration import ConfigurationManager
from src.PROJECTML.components.data_transformation import DataTransformation
from sklearn.metrics import accuracy_score
import pickle
#from PROJECTML.entity.config_entity import ModelTrainerConfig
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.ensemble import  ExtraTreesClassifier
import numpy as np

In [8]:
class ModelTrainer:
    def __init__(self, config:ModelTrainerConfig):
        self.config = config

    def train(self):
        self.train_data = pd.read_csv(self.config.train_data_path)
        self.test_data = pd.read_csv(self.config.test_data_path)

        self.x_train = self.train_data.drop(columns=['Load_Type'])
        print(f'this is the self.x_train dataset {self.x_train.columns}')
        self.y_train = self.train_data['Load_Type']
        self.x_test = self.test_data.drop(columns=['Load_Type'])
        self.y_test = self.test_data['Load_Type']


    def model(self):
        clf = [
            ExtraTreesClassifier()
        ]

        scores = []

        for model in clf:
            model.fit(self.x_train, self.y_train)
            train_pred = model.predict(self.x_train)
            test_pred = model.predict(self.x_test)

            train_accuracy = accuracy_score(self.y_train, train_pred)
            test_accuracy = accuracy_score(self.y_test, test_pred)

            train_cm = confusion_matrix(self.y_train, train_pred)
            test_cm = confusion_matrix(self.y_test, test_pred)

            train_precision = precision_score(self.y_train, train_pred, average='weighted')
            test_precision = precision_score(self.y_test, test_pred, average='weighted')

            train_recall = recall_score(self.y_train, train_pred, average='weighted')
            test_recall = recall_score(self.y_test, test_pred, average='weighted')

            train_f1 = f1_score(self.y_train, train_pred, average='weighted')
            test_f1 = f1_score(self.y_test, test_pred, average='weighted')

            scores.append({
                'Model': type(model).__name__,
                'Training Accuracy': train_accuracy,
                'Testing Accuracy': test_accuracy,
                'Training Precision': train_precision,
                'Testing Precision': test_precision,
                'Training Recall': train_recall,
                'Testing Recall': test_recall,
                'Training F1-score': train_f1,
                'Testing F1-score': test_f1
            })

            print("Model:", type(model).__name__)
            print("Training Accuracy:", train_accuracy)
            print("Testing Accuracy:", test_accuracy)
            print("Training Precision:", train_precision)
            print("Testing Precision:", test_precision)
            print("Training Recall:", train_recall)
            print("Testing Recall:", test_recall)
            print("Training F1-score:", train_f1)
            print("Testing F1-score:", test_f1)

            joblib.dump(model, os.path.join(self.config.root_dir, f"{type(model).__name__}_model.joblib"))


                # Load the trained model  and test model 
            model = joblib.load("artifacts\model_trainer\ExtraTreesClassifier_model.joblib")  # Replace "path_to_saved_model.pkl" with the actual path

            #self.preprocessor = joblib.load('artifacts\data_transformation\categorical_preprocessor_obj.joblib')
            # Prepare input data for prediction (a single sample row)
            # Replace the feature values with the values of your unseen test data
            single_sample = {
            'Usage_kWh': 8.46,
            'Lagging_Reactive_Power_kVarh': 0,
            'Leading_Reactive_Power_kVarh': 25.92,
            'CO2': 0,
            'Lagging_Power_Factor': 100,
            'Leading_Power_Factor': 31.03,
            'NSM': 45000,
            'WeekStatus_Weekday': 1,
            'WeekStatus_Weekend': 0,
            'hour': 20
            
        }
                    
        #8.46,0,25.92,0,100,31.03,45000,Weekday,Tuesday,Medium_Load

        #40.25,8.82,0.5,0,97.68,99.99,67500,Weekday,Tuesday,Maximum_Load

        # Convert the dictionary to a DataFrame
        input_data = pd.DataFrame([single_sample])
        #preprocessed_input_data = self.preprocessor.transform(input_data)

        # Ensure that the columns of input_data match the order of features used during training
        # You might need to rearrange the columns or add missing columns
        input_data = input_data[self.x_train.columns]

        # Perform prediction
        prediction = model.predict(input_data)

        print("Predicted class label:", prediction)




        return pd.DataFrame(scores)








In [9]:
try:
    config = ConfigurationManager() # here iam initlizing my ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config() # here iam getting my get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config) # here iam  passing my  model_trainer_config to the ModelTrainer function
    model_trainer_config.train() # here iam training the model
    model_trainer_config.model()
except Exception as e:
    raise e

[2024-04-17 19:57:50,973: INFO: common: yaml file: config\config.yaml loaded successfully]


[2024-04-17 19:57:50,977: INFO: common: yaml file: params.yaml loaded successfully]
[2024-04-17 19:57:50,984: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-04-17 19:57:50,987: INFO: common: created directory at: artifacts]
[2024-04-17 19:57:50,990: INFO: common: created directory at: artifacts/model_trainer]
this is the self.x_train dataset Index(['WeekStatus_Weekday', 'WeekStatus_Weekend', 'Usage_kWh',
       'Lagging_Reactive_Power_kVarh', 'Leading_Reactive_Power_kVarh', 'CO2',
       'Lagging_Power_Factor', 'Leading_Power_Factor', 'NSM', 'hour'],
      dtype='object')
Model: ExtraTreesClassifier
Training Accuracy: 0.9996802911809551
Testing Accuracy: 0.953519256308101
Training Precision: 0.9996802803009861
Testing Precision: 0.9537045136445582
Training Recall: 0.9996802911809551
Testing Recall: 0.953519256308101
Training F1-score: 0.9996802821192954
Testing F1-score: 0.9535041829919827
Predicted class label: [2]
