In [1]:
import os
%pwd

'd:\\Machine_Learning\\Titanic_Pipeline_Project\\research'

In [2]:
os.chdir("../")
%pwd

'd:\\Machine_Learning\\Titanic_Pipeline_Project'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelEvaluationConfig:
    root_dir:Path
    data_path:Path
    model_path:Path
    metrics_file_name: Path

In [4]:
from titanic.constants import *
from titanic.utils.common import read_yaml, create_directories

In [5]:
class ConfigurationManager:
    def __init__(
            self,
            config_file_path = CONFIG_FILE_PATH,
            params_file_path = PARAMS_FILE_PATH
            ):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])
    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir= config.root_dir,
            data_path= config.data_path,
            model_path= config.model_path,
            metrics_file_name= config.metrics_file_name
        )

        return model_evaluation_config


In [6]:
from sklearn.preprocessing import StandardScaler
from titanic.utils.common import load_object
from sklearn.metrics import accuracy_score, confusion_matrix
from titanic.logging import logger
import pandas as pd
import yaml

class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    def load_dataset(self):
        df = pd.read_csv(self.config.data_path)
        logger.info("Data loaded successfully for model evaluation")
        return df
    
    def scaled_data(self):
        df = self.load_dataset()

        x = df.drop(columns='Survived', axis=1)
        y = df['Survived']

        sd = StandardScaler()
        x = sd.fit_transform(x)


        logger.info("Data has been scaled successfully")
        return x,y

    def evaluate(self):

        x,y_true = self.scaled_data()

        model = load_object(Path(os.path.join("artifacts","model_trainer","model.pkl")))
        logger.info("Model Loaded successfully for model evaluation")

        y_pred = model.predict(x)

        accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
        cm = confusion_matrix(y_true=y_true, y_pred=y_pred)

        logger.info(f"Accuracy of the Model: {accuracy}")
        logger.info(f"Confusion Matrix of the Model: {cm}")

        content = dict()
        content['accuracy'] = str(accuracy)
        content['confusion_matrix'] = str(cm)

        return content
    
    def save_metrics(self):
        content = self.evaluate()
        logger.info(f"Metrics saved to {self.config.metrics_file_name}")

        with open(self.config.metrics_file_name, 'w') as f:
            yaml.dump(content, f)
        

In [7]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(model_evaluation_config)
    model_evaluation.save_metrics()
except Exception as e:
    raise e 

[2023-12-30 13:31:32,860: INFO: common: yaml file config\config.yaml loaded successfully]
[2023-12-30 13:31:32,860: INFO: common: yaml file params.yaml loaded successfully]
[2023-12-30 13:31:32,871: INFO: common: created directory at: artifacts]
[2023-12-30 13:31:32,871: INFO: common: created directory at: artifacts/model_evaluation]
[2023-12-30 13:31:32,871: INFO: 646563725: Data loaded successfully for model evaluation]
[2023-12-30 13:31:32,879: INFO: 646563725: Data has been scaled successfully]
[2023-12-30 13:31:32,981: INFO: 646563725: Model Loaded successfully for model evaluation]
[2023-12-30 13:31:32,989: INFO: 646563725: Accuracy of the Model: 0.9347581552305961]
[2023-12-30 13:31:32,989: INFO: 646563725: Confusion Matrix of the Model: [[536  13]
 [ 45 295]]]
[2023-12-30 13:31:32,993: INFO: 646563725: Metrics saved to artifacts/model_evaluation/metrics.txt]


In [8]:
import numpy as np 

a = np.array([[1,2,3],[4,5, 6]])
str(a.tolist())

'[[1, 2, 3], [4, 5, 6]]'

In [9]:
import yaml

config = {'confusionmatrix': '[[1,2,3,4],[5,6,7,8]]', 'accuracy': 0.33452}

with open(PARAMS_FILE_PATH, 'w') as f:
    yaml.dump(config, f)


In [10]:
a = {
    1:('a','b'),
    2:('c','d'),
    3:('e','f'),
    4:('g','h'),
}

bm = max(list(a.values()))
a[max(a)][0]
# bm

'g'