In [35]:
# metrics
from sklearn.metrics import roc_auc_score, accuracy_score
# joblib to save model
import joblib

# logger and config
from helpers.config import load_config
from helpers.logger import logger

# data transformation
from src.data.data_transformation import DataTransformation

import pandas as pd
from typing import Dict, List

# best model
from sklearn.linear_model import LogisticRegression

class Evaluation:
    """Class to evaluate metrics of best model from model trainer with best params."""
    
    def __init__(self, config: dict, data: DataTransformation | None = None):
        """Initialize ModelEvaluation class.
        
        Args:
            config (dict): Config file consisting of features, targets, file paths.
        """
        self.config = config or load_config()
        self.data = data or DataTransformation(self.config)
        self.scores = []
        
    def eval_best_model(self, 
                        y_test: List[pd.Series],
                        y_pred: List[pd.Series], 
                        y_pred_prob: List[pd.Series]
                        ) -> Dict[str, float]:
        """Evaluate metrics from best model from model trainer.
        
        Args:
            y_test (List[pd.Series]): the actual value.
            y_pred (List[pd.Series]): the predicted value.
            y_pred_prob (List[pd.Series]): the predicted probability.
        """
        try:
            # load in data
            X_train_scaled, X_test_scaled = self.data.split_and_scale_features()
            
            y_train, y_test = self.data.split_targets()
            

            
            model = LogisticRegression(
                C=10,
                max_iter= 1000,
                solver="liblinear"
                )
                
                
            
            model = model.fit(X_train_scaled, y_train)
            
            # save model
            joblib.dump(model,self.config['model_path'])
            
            y_pred = model.predict(X_test_scaled)
            y_pred_prob = model.predict_proba(X_test_scaled)[:, 1]
            
            # accuracy score and roc/auc score
            acc = accuracy_score(y_test, y_pred)
            roc = roc_auc_score(y_test, y_pred_prob)
    
            
            self.scores.append({
                "roc/auc": roc,
                "accuracy": acc
                })
            
            return self.scores
        except Exception as e:
            logger.error(f"Could not get scores: {e}")
        return None

y_pred = [0,1,2,3,4,5,6,7]
y_pred_prob = [0,1,2,3,4,5,6,7]
y_test = [0,1,2,3,4,5,6,7]
eval = Evaluation(config = load_config()).eval_best_model(y_test, y_pred, y_pred_prob)
eval

[2026-01-04 14:42:08,824: INFO: data_transformation: Features have been split]
[2026-01-04 14:42:08,830: INFO: data_transformation: Shape of X_train_scaled: [[ 4.90283207  0.40849867  0.50869345 ... -0.6710645  -0.53641908
  -0.87616094]
 [-0.1817844   0.40849867  0.50869345 ...  2.32650402 -0.53641908
  -0.87616094]
 [-0.1817844   0.40849867  0.50869345 ... -0.07155079  1.86421408
  -0.87616094]
 ...
 [-0.1817844   0.40849867  0.50869345 ... -0.07155079 -0.53641908
   1.14134282]
 [-0.1817844   0.40849867  0.50869345 ... -0.6710645  -0.53641908
  -0.87616094]
 [-0.1817844   0.40849867  0.50869345 ... -0.6710645   1.86421408
  -0.87616094]]]
[2026-01-04 14:42:08,830: INFO: data_transformation: Shape of X_test_scaled: (476, 15)]
[2026-01-04 14:42:08,833: INFO: data_transformation: y_train and y_test have been initialized]
[2026-01-04 14:42:08,833: INFO: data_transformation: Shape of y_train: (1902,)]
[2026-01-04 14:42:08,834: INFO: data_transformation: Shape of y_test: (476,)]


[{'roc/auc': 0.8865698729582577, 'accuracy': 0.9327731092436975}]