In [1]:
import os

In [2]:
%pwd


'c:\\Users\\Jaison\\Documents\\Workspace\\Main Projects\\Audio-Based-Anomaly-Detection-for-Industrial-Machinery-End-to-End-Project-using-MLflow-DVC\\notebooks'

In [3]:
os.chdir("../")
%pwd

'c:\\Users\\Jaison\\Documents\\Workspace\\Main Projects\\Audio-Based-Anomaly-Detection-for-Industrial-Machinery-End-to-End-Project-using-MLflow-DVC'

In [4]:
os.environ["MLFLOW_TRACKING_URI"]="https://dagshub.com/JAISON14/Audio-Based-Anomaly-Detection-for-Industrial-Machinery-End-to-End-Project-using-MLflow-DVC.mlflow" 
os.environ["MLFLOW_TRACKING_USERNAME"]="JAISON14" 
os.environ["MLFLOW_TRACKING_PASSWORD"]="bd73d8bfc9f7a55ee6faf6cdad0bb66177f5f752"


## Entity

In [5]:
import tensorflow as tf

In [6]:
#model = tf.keras.models.load_model("artifacts/training/model.h5")

In [26]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class EvaluationConfig:
    trained_model_path: Path
    root_dir: Path
    all_params: dict
    mlflow_uri: str
    feature_names_path: Path
    feature_importance_path: Path
    X_combined_test_path: Path
    y_combined_test_path: Path
    scores_path : Path
    params_epochs: int
    params_batch_size: int
    params_feature_count: int

In [27]:
from Anomaly_Detection.constants import *
from Anomaly_Detection.utils.common import read_yaml, create_directories,save_bin,load_bin

In [28]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    
    def get_evaluation_config(self) -> EvaluationConfig:
        evaluation = self.config.evaluation
        params = self.params.training
        create_directories([
            Path(evaluation.root_dir)
        ])
        eval_config = EvaluationConfig(

            mlflow_uri="https://dagshub.com/JAISON14/Audio-Based-Anomaly-Detection-for-Industrial-Machinery-End-to-End-Project-using-MLflow-DVC.mlflow",
            root_dir=Path(evaluation.root_dir),
            feature_names_path=Path(evaluation.feature_names_path),
            trained_model_path=Path(evaluation.trained_model_path),
            feature_importance_path=Path(evaluation.feature_importance_path),
            X_combined_test_path=Path(evaluation.X_combined_test_path),
            y_combined_test_path=Path(evaluation.y_combined_test_path),
            scores_path=Path(evaluation.scores_path),
            all_params=self.params,
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_feature_count=params.FEATURE_COUNT
        )
        return eval_config


## Components

In [29]:
import tensorflow as tf
from pathlib import Path
import mlflow
import mlflow.keras
from urllib.parse import urlparse
import os
import urllib.request as request
import tensorflow as tf
from tensorflow.keras.models import Model
from Anomaly_Detection import logger
from Anomaly_Detection.utils.common import get_size,save_json
from tensorflow.keras.models import load_model

import pandas as pd

import numpy as np
import librosa
import os
import joblib

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, precision_recall_curve


In [44]:


class Evaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config

    def model_evaluation(self,autoencoder,X_combined_test, y_combined_test,top_features):
        reconstructed_combined = autoencoder.predict(X_combined_test)
        mse_combined = np.mean(np.power(X_combined_test - reconstructed_combined, 2), axis=1)
        # Calculate precision-recall curve
        precisions, recalls, thresholds = precision_recall_curve(y_combined_test, mse_combined)

        # Calculate F1 score for each threshold
        f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
        optimal_idx = np.argmax(f1_scores)
        optimal_threshold = thresholds[optimal_idx]

        # Use the optimal threshold to define anomalies
        optimal_predictions = (mse_combined > optimal_threshold).astype(int)

        # Calculate metrics using the optimal threshold
        optimal_accuracy = accuracy_score(y_combined_test, optimal_predictions)
        optimal_precision = precision_score(y_combined_test, optimal_predictions)
        optimal_recall = recall_score(y_combined_test, optimal_predictions)
        optimal_f1 = f1_score(y_combined_test, optimal_predictions)
        optimal_cm = confusion_matrix(y_combined_test, optimal_predictions)

        self.save_score(optimal_threshold,optimal_accuracy,optimal_precision,optimal_recall,optimal_f1)

        self.feature_importance(reconstructed_combined,X_combined_test,top_features)
        
        # Print metrics using the optimal threshold
        logger.info(f"Optimal Threshold: {optimal_threshold}")
        logger.info(f"Accuracy: {optimal_accuracy}")
        logger.info(f"Precision: {optimal_precision}")
        logger.info(f"Recall: {optimal_recall}")
        logger.info(f"F1 Score: {optimal_f1}")
        logger.info(f"confusion_matrix: {optimal_cm}")   

    def feature_importance(self,reconstructed_combined,X_combined_test,feature_names):
        # Calculate the mean squared reconstruction error for each feature
        mse_features = np.mean(np.power(X_combined_test - reconstructed_combined, 2), axis=0)
        # Rank features by reconstruction error
        feature_importance_ranking = np.argsort(mse_features)[::-1]  # Features with the highest error first
        # Create a dictionary to store feature names and their importance scores
        feature_importance = {}
        for idx, feature_idx in enumerate(feature_importance_ranking):
            feature_name = feature_names[feature_idx] 
            importance_score = mse_features[feature_idx]
            feature_importance[feature_name] = importance_score
        joblib.dump(feature_importance,(os.path.join(self.config.root_dir,"feature_importance.pkl")))
        

        
    def feature_selection(self, N, feature_importance_ranking, feature_names):
        top_features_indices = feature_importance_ranking[:N]
        top_features=[]
        for rank in feature_importance_ranking[:N]:
            top_features.append(feature_names[rank])
        return top_features,top_features_indices

    def evaluation(self):
        logger.info(f"Starting Model Evaluation")
        feature_names = joblib.load(self.config.feature_names_path)

        n = self.config.params_feature_count
        feature_importance_ranking= joblib.load(self.config.feature_importance_path)

        top_features,top_features_indices = self.feature_selection(n, feature_importance_ranking, feature_names)
        X_combined_test = joblib.load(self.config.X_combined_test_path)
        y_combined_test = joblib.load(self.config.y_combined_test_path)

        autoencoder = load_model(self.config.trained_model_path)
        self.model_evaluation(autoencoder,X_combined_test, y_combined_test,top_features)        
   

    def save_score(self,optimal_threshold,optimal_accuracy,optimal_precision,optimal_recall,optimal_f1):
        scores = {"Optimal Threshold": optimal_threshold, "Accuracy": optimal_accuracy,
                  "Precision": optimal_precision , "Recall": optimal_recall,
                  "F1 Score": optimal_f1
                  }
        #joblib.dump(scores,(os.path.join(self.config.root_dir, "scores.pkl")))
        joblib.dump(scores,self.config.scores_path)
        save_json(path=Path((os.path.join(self.config.root_dir, "scores.json"))), data=scores)

    
    def log_into_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        
        with mlflow.start_run():
            mlflow.log_params(self.config.all_params)
            feature_importance=joblib.load((os.path.join(self.config.root_dir,"feature_importance.pkl")))
            mlflow.log_params(feature_importance)

            #scores=joblib.load((os.path.join(self.config.root_dir, "scores.pkl")))
            scores=joblib.load(self.config.scores_path)

            # Load the model
            model = load_model(self.config.trained_model_path)
            
            mlflow.log_metrics(
                scores
            )
            # Model registry does not work with file store
            if tracking_url_type_store != "file":

                # Register the model
                # There are other ways to use the Model Registry, which depends on the use case,
                # please refer to the doc for more information:
                # https://mlflow.org/docs/latest/model-registry.html#api-workflow
                mlflow.keras.log_model(model, "model", registered_model_name="Top 5 Features")
            else:
                mlflow.keras.log_model(model, "model")

In [45]:
try:
    config = ConfigurationManager()
    eval_config = config.get_evaluation_config()
    evaluation = Evaluation(eval_config)
    evaluation.evaluation()
    evaluation.log_into_mlflow()

except Exception as e:
   raise e

[2023-12-04 23:00:22,189: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-12-04 23:00:22,197: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-04 23:00:22,202: INFO: common: created directory at: artifacts]
[2023-12-04 23:00:22,209: INFO: common: created directory at: artifacts\evaluation]
[2023-12-04 23:00:22,212: INFO: 1110878222: Starting Model Evaluation]


[2023-12-04 23:00:25,009: INFO: common: json file saved at: artifacts\evaluation\scores.json]
[2023-12-04 23:00:25,014: INFO: 1110878222: Optimal Threshold: 1.26537117259149]
[2023-12-04 23:00:25,017: INFO: 1110878222: Accuracy: 0.9488372093023256]
[2023-12-04 23:00:25,023: INFO: 1110878222: Precision: 0.9847328244274809]
[2023-12-04 23:00:25,029: INFO: 1110878222: Recall: 0.9347826086956522]
[2023-12-04 23:00:25,039: INFO: 1110878222: F1 Score: 0.9591078066914499]
[2023-12-04 23:00:25,051: INFO: 1110878222: confusion_matrix: [[ 75   2]
 [  9 129]]]




INFO:tensorflow:Assets written to: C:\Users\Jaison\AppData\Local\Temp\tmphr2ikipv\model\data\model\assets
[2023-12-04 23:00:37,052: INFO: builder_impl: Assets written to: C:\Users\Jaison\AppData\Local\Temp\tmphr2ikipv\model\data\model\assets]


Registered model 'Top 5 Features' already exists. Creating a new version of this model...
2023/12/04 23:01:16 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: Top 5 Features, version 3
Created version '3' of model 'Top 5 Features'.
