### Sleep Disorder Model Build and evaluate


In [1]:
import os

In [2]:
%pwd

'd:\\Projects\\Sleeping_disorder_detection\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Projects\\Sleeping_disorder_detection'

In [5]:
import warnings

warnings.filterwarnings("ignore")

#### Model Building


In [6]:
from dataclasses import dataclass
from pathlib import Path


@dataclass
class ModelBuildEvaluateConfig:
    root_dir: Path
    model_dir: Path
    model_file: Path
    train_data_file: Path
    test_data_file: Path
    model_results_file: Path

In [7]:
from sleep_disorder.constants import *
from sleep_disorder.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(self, config_file_path=CONFIG_FILE_PATH, params_file_path=PARAMS_FILE_PATH) -> None:
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

    def get_model_build_evaluate_config(self) -> ModelBuildEvaluateConfig:
        config = self.config.model_build_evaluate

        model_build_evaluate_config = ModelBuildEvaluateConfig(
            root_dir=config.root_dir,
            model_dir=config.model_dir,
            model_file=config.model_file,
            train_data_file=config.train_data_file,
            test_data_file=config.test_data_file,
            model_results_file=config.model_results_file,
        )

        return model_build_evaluate_config

In [9]:
import pandas as pd
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

from sleep_disorder.logging import logger

In [10]:
from sklearn.metrics import classification_report, accuracy_score, recall_score, precision_score, f1_score

In [11]:
def get_results(name, accuracy, recall, precision, f1):
    results = pd.DataFrame()
    results['Name'] = name
    results['Accuracy'] = accuracy
    results['Recall'] = recall
    results['Precision'] = precision
    results['F1_score'] = f1

    return results

In [14]:
class ModelBuildEvaluate:
    def __init__(self, config: ModelBuildEvaluateConfig) -> None:
        self.config = config

        create_directories([self.config.root_dir, self.config.model_dir])

    def train_model(self):
        models = {
            "LogisticRegression": LogisticRegression(),
            "DecisionTreeClassifier": DecisionTreeClassifier(),
            "RandomForestClassifier": RandomForestClassifier(),
            "GradientBoostingClassifier": GradientBoostingClassifier(),
            "AdaBoostClassifier": AdaBoostClassifier(),
            "SVC": SVC(),
            "XGBClassifier": XGBClassifier(),
            "CatBoostClassifier": CatBoostClassifier(verbose=False),
        }

        train_data = pd.read_csv(self.config.train_data_file)

        X_train = train_data.drop(columns="Sleep Disorder")
        y_train = train_data["Sleep Disorder"]

        trained_models = {}

        name = []
        accuracy = []
        recall = []
        precision = []
        f1 = []

        for model_name in models.keys():
            model = models[model_name]
            model.fit(X_train, y_train)
            pred = model.predict(X_train)
            print(f"Train data {model_name} Classification Report:\n{classification_report(y_train, pred)}\n\n\n")
            name.append(model_name)
            accuracy.append(accuracy_score(y_train, pred))
            recall.append(recall_score(y_train, pred, average="macro"))
            precision.append(precision_score(y_train, pred, average="macro"))
            f1.append(f1_score(y_train, pred, average="macro"))
            trained_models[model_name] = model

        results = get_results(name, accuracy, recall, precision, f1)

        return trained_models, results

    def test_model(self, train_models):
        test_data = pd.read_csv(self.config.test_data_file)

        X_test = test_data.drop(columns="Sleep Disorder")
        y_test = test_data["Sleep Disorder"]

        name = []
        accuracy = []
        recall = []
        precision = []
        f1 = []

        print("\n\n\n\n\nTest Results:")

        for model_name in train_models.keys():
            model = train_models[model_name]
            model.fit(X_test, y_test)
            pred = model.predict(X_test)
            print(f"Test data {model_name} Classification Report:\n{classification_report(y_test, pred)}\n\n\n")
            name.append(model_name)
            accuracy.append(accuracy_score(y_test, pred))
            recall.append(recall_score(y_test, pred, average="macro"))
            precision.append(precision_score(y_test, pred, average="macro"))
            f1.append(f1_score(y_test, pred, average="macro"))

        results = get_results(name, accuracy, recall, precision, f1)

        best_model_name = list(results.sort_values('Accuracy', ascending=False)["Name"])[0]

        best_model = train_models[best_model_name]

        return best_model, results

    def save_model(self, model):
        with open(self.config.model_file, "wb") as model_pkl:
            pickle.dump(model, model_pkl)
            logger.info(f"Best model saved to path: {self.config.model_file}")

    def save_test_results(self, test_results):
        with open(self.config.model_results_file, "w") as f:
            f.write(test_results)
            logger.info(f"All model results saved to path: {self.config.model_file}")

In [15]:
try:
    config = ConfigurationManager()
    model_train_evaluate_config = config.get_model_build_evaluate_config()
    model_build_evaluate = ModelBuildEvaluate(config=model_train_evaluate_config)
    train_models, train_results = model_build_evaluate.train_model()
    best_model, test_results = model_build_evaluate.test_model(train_models=train_models)
    model_build_evaluate.save_model(best_model)
    model_build_evaluate.save_test_results(test_results=str(test_results))
except Exception as e:
    raise e

[2023-12-22 01:15:47,746]: INFO common yaml file: config\config.yaml loaded successfully.
[2023-12-22 01:15:47,749]: INFO common yaml file: params.yaml loaded successfully.
[2023-12-22 01:15:47,751]: INFO common Directory created at: artifacts/model_evaluation
[2023-12-22 01:15:47,752]: INFO common Directory created at: model
Train data LogisticRegression Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.96      0.87       157
           1       0.90      0.83      0.86        52
           2       0.85      0.42      0.56        52

    accuracy                           0.82       261
   macro avg       0.85      0.74      0.77       261
weighted avg       0.83      0.82      0.81       261




Train data DecisionTreeClassifier Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.97      0.97       157
           1       0.89      0.98      0.94        52
           2   