In [95]:
import os

In [110]:
%pwd

'c:\\Users\\pouru\\OneDrive\\Desktop\\360_digit_mg_solution\\360_digit_mg_project\\Fair-and-Efficient-Bonus-Allocation-System'

In [10]:
os.chdir("../")

In [96]:
%pwd

'c:\\Users\\pouru\\OneDrive\\Desktop\\360_digit_mg_solution\\360_digit_mg_project\\Fair-and-Efficient-Bonus-Allocation-System'

In [119]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str


In [120]:
from src.Bonus_Allocation_System.constants import *
from src.Bonus_Allocation_System.utils.common import read_yaml,create_directories

In [121]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.KNeighborsClassifier
        schema =  self.schema.TARGET_COLUMNS

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,

            
        )

        return model_trainer_config

In [122]:
import pandas as pd
import os
from src.Bonus_Allocation_System.logging import logger
import joblib
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report,ConfusionMatrixDisplay, \
                            precision_score, recall_score, f1_score, roc_auc_score,roc_curve 
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split

In [123]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config


        
    def train(self):
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)
        train_data.head()
        models = {
            "Random Forest": RandomForestClassifier(),
            "Decision Tree": DecisionTreeClassifier(),
            "Gradient Boosting": GradientBoostingClassifier(),
            "Logistic Regression": LogisticRegression(),
            "K-Neighbors Classifier": KNeighborsClassifier(),
            "XGBClassifier": XGBClassifier(), 
            "CatBoosting Classifier": CatBoostClassifier(verbose=False),
            "Support Vector Classifier": SVC(),
            "AdaBoost Classifier": AdaBoostClassifier()
            }
        def evaluate_clf(true, predicted):
            acc = accuracy_score(true, predicted) # Calculate Accuracy
            f1 = f1_score(true, predicted) # Calculate F1-score
            precision = precision_score(true, predicted) # Calculate Precision
            recall = recall_score(true, predicted)  # Calculate Recall
            roc_auc = roc_auc_score(true, predicted) #Calculate Roc
            return acc, f1 , precision, recall, roc_auc

        X_train = train_data[['Winning_percentage', 'Average_Bet_Amount',
       'Number_of_Bonuses_Received', 'Amount_of_Bonuses_Received',
       'Revenue_from_Bonuses']]
        X_test = test_data[['Winning_percentage', 'Average_Bet_Amount',
       'Number_of_Bonuses_Received', 'Amount_of_Bonuses_Received',
       'Revenue_from_Bonuses']]
        y_train = train_data[['Should_Receive_Bonus']]
        y_test = test_data[['Should_Receive_Bonus']]
        models_list = []
        accuracy_list = []
        auc= []
        for i in range(len(list(models))):
            model = list(models.values())[i]
            model.fit(X_train, y_train) # Train model

            # Make predictions
            y_train_pred = model.predict(X_train)
            y_test_pred = model.predict(X_test)

            # Training set performance
            model_train_accuracy, model_train_f1,model_train_precision,\
            model_train_recall,model_train_rocauc_score=evaluate_clf(y_train ,y_train_pred)


            # Test set performance
            model_test_accuracy,model_test_f1,model_test_precision,\
            model_test_recall,model_test_rocauc_score=evaluate_clf(y_test, y_test_pred)

            print(list(models.keys())[i])
            models_list.append(list(models.keys())[i])

            print('Model performance for Training set')
            print("- Accuracy: {:.4f}".format(model_train_accuracy))
            print('- F1 score: {:.4f}'.format(model_train_f1)) 
            print('- Precision: {:.4f}'.format(model_train_precision))
            print('- Recall: {:.4f}'.format(model_train_recall))
            print('- Roc Auc Score: {:.4f}'.format(model_train_rocauc_score))

            print('----------------------------------')

            print('Model performance for Test set')
            print('- Accuracy: {:.4f}'.format(model_test_accuracy))
            accuracy_list.append(model_test_accuracy)
            print('- F1 score: {:.4f}'.format(model_test_f1))
            print('- Precision: {:.4f}'.format(model_test_precision))
            print('- Recall: {:.4f}'.format(model_test_recall))
            print('- Roc Auc Score: {:.4f}'.format(model_test_rocauc_score))
            auc.append(model_test_rocauc_score)
            print('='*35)
            print('\n')
        
        report=pd.DataFrame(list(zip(models_list, accuracy_list)), columns=['Model Name', 'Accuracy']).sort_values(by=['Accuracy'], ascending=False)
        logger.info("Final accurary table")
        print(report.to_string(index=False))
        xbg = XGBClassifier()
        joblib.dump(xbg, os.path.join(self.config.root_dir, self.config.model_name))
        return report

    


     #   joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))

In [124]:
import os 
%pwd

'c:\\Users\\pouru\\OneDrive\\Desktop\\360_digit_mg_solution\\360_digit_mg_project\\Fair-and-Efficient-Bonus-Allocation-System'

In [125]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-08-17 11:50:06,439: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-08-17 11:50:06,442: INFO: common: yaml file: params.yaml loaded successfully]
[2024-08-17 11:50:06,449: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-08-17 11:50:06,452: INFO: common: created directory at: artifacts]
[2024-08-17 11:50:06,455: INFO: common: created directory at: artifacts/model_trainer]


  return fit_method(estimator, *args, **kwargs)


Random Forest
Model performance for Training set
- Accuracy: 0.9995
- F1 score: 0.9995
- Precision: 1.0000
- Recall: 0.9990
- Roc Auc Score: 0.9995
----------------------------------
Model performance for Test set
- Accuracy: 0.9104
- F1 score: 0.9179
- Precision: 0.9260
- Recall: 0.9099
- Roc Auc Score: 0.9105


Decision Tree
Model performance for Training set
- Accuracy: 0.9995
- F1 score: 0.9995
- Precision: 1.0000
- Recall: 0.9990
- Roc Auc Score: 0.9995
----------------------------------
Model performance for Test set
- Accuracy: 0.8928
- F1 score: 0.9023
- Precision: 0.9050
- Recall: 0.8997
- Roc Auc Score: 0.8920




  y = column_or_1d(y, warn=True)


Gradient Boosting
Model performance for Training set
- Accuracy: 0.9141
- F1 score: 0.9182
- Precision: 0.9201
- Recall: 0.9164
- Roc Auc Score: 0.9140
----------------------------------
Model performance for Test set
- Accuracy: 0.9072
- F1 score: 0.9151
- Precision: 0.9218
- Recall: 0.9084
- Roc Auc Score: 0.9071




  y = column_or_1d(y, warn=True)


Logistic Regression
Model performance for Training set
- Accuracy: 0.8851
- F1 score: 0.8891
- Precision: 0.9028
- Recall: 0.8758
- Roc Auc Score: 0.8856
----------------------------------
Model performance for Test set
- Accuracy: 0.8928
- F1 score: 0.9007
- Precision: 0.9184
- Recall: 0.8837
- Roc Auc Score: 0.8938




  return self._fit(X, y)


K-Neighbors Classifier
Model performance for Training set
- Accuracy: 0.9229
- F1 score: 0.9263
- Precision: 0.9318
- Recall: 0.9209
- Roc Auc Score: 0.9230
----------------------------------
Model performance for Test set
- Accuracy: 0.8952
- F1 score: 0.9037
- Precision: 0.9138
- Recall: 0.8939
- Roc Auc Score: 0.8953


XGBClassifier
Model performance for Training set
- Accuracy: 0.9864
- F1 score: 0.9870
- Precision: 0.9928
- Recall: 0.9812
- Roc Auc Score: 0.9867
----------------------------------
Model performance for Test set
- Accuracy: 0.9056
- F1 score: 0.9131
- Precision: 0.9254
- Recall: 0.9012
- Roc Auc Score: 0.9061


CatBoosting Classifier
Model performance for Training set
- Accuracy: 0.9456
- F1 score: 0.9479
- Precision: 0.9552
- Recall: 0.9407
- Roc Auc Score: 0.9459
----------------------------------
Model performance for Test set
- Accuracy: 0.9112
- F1 score: 0.9186
- Precision: 0.9274
- Recall: 0.9099
- Roc Auc Score: 0.9113




  y = column_or_1d(y, warn=True)


Support Vector Classifier
Model performance for Training set
- Accuracy: 0.8155
- F1 score: 0.8218
- Precision: 0.8352
- Recall: 0.8089
- Roc Auc Score: 0.8158
----------------------------------
Model performance for Test set
- Accuracy: 0.8192
- F1 score: 0.8326
- Precision: 0.8489
- Recall: 0.8169
- Roc Auc Score: 0.8195




  y = column_or_1d(y, warn=True)


AdaBoost Classifier
Model performance for Training set
- Accuracy: 0.8936
- F1 score: 0.8970
- Precision: 0.9138
- Recall: 0.8809
- Roc Auc Score: 0.8943
----------------------------------
Model performance for Test set
- Accuracy: 0.9008
- F1 score: 0.9076
- Precision: 0.9312
- Recall: 0.8852
- Roc Auc Score: 0.9026


[2024-08-17 11:50:16,944: INFO: 2266060265: Final accurary table]
               Model Name  Accuracy
   CatBoosting Classifier    0.9112
            Random Forest    0.9104
        Gradient Boosting    0.9072
            XGBClassifier    0.9056
      AdaBoost Classifier    0.9008
   K-Neighbors Classifier    0.8952
            Decision Tree    0.8928
      Logistic Regression    0.8928
Support Vector Classifier    0.8192
