In [95]:
import os

In [172]:
%pwd

'c:\\Users\\pouru\\OneDrive\\Desktop\\360_digit_mg_solution\\360_digit_mg_project\\Fair-and-Efficient-Bonus-Allocation-System'

In [10]:
os.chdir("../")

In [183]:
%pwd

'c:\\Users\\pouru\\OneDrive\\Desktop\\360_digit_mg_solution\\360_digit_mg_project\\Fair-and-Efficient-Bonus-Allocation-System'

In [173]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str


In [184]:
from src.Bonus_Allocation_System.constants import *

from src.Bonus_Allocation_System.utils.common import read_yaml,create_directories

In [175]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.KNeighborsClassifier
        schema =  self.schema.TARGET_COLUMNS

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,

            
        )

        return model_trainer_config

In [176]:
import pandas as pd
import os
from src.Bonus_Allocation_System.logging import logger
import joblib
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report,ConfusionMatrixDisplay, \
                            precision_score, recall_score, f1_score, roc_auc_score,roc_curve 
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split

In [177]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config


        
    def train(self):
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)
        train_data.head()
        models = {
            "Random Forest": RandomForestClassifier(),
            "Decision Tree": DecisionTreeClassifier(),
            "Gradient Boosting": GradientBoostingClassifier(),
            "Logistic Regression": LogisticRegression(),
            "K-Neighbors Classifier": KNeighborsClassifier(),
            "XGBClassifier": XGBClassifier(), 
            "CatBoosting Classifier": CatBoostClassifier(verbose=False),
            "Support Vector Classifier": SVC(),
            "AdaBoost Classifier": AdaBoostClassifier()
            }
        def evaluate_clf(true, predicted):
            acc = accuracy_score(true, predicted) # Calculate Accuracy
            f1 = f1_score(true, predicted) # Calculate F1-score
            precision = precision_score(true, predicted) # Calculate Precision
            recall = recall_score(true, predicted)  # Calculate Recall
            roc_auc = roc_auc_score(true, predicted) #Calculate Roc
            return acc, f1 , precision, recall, roc_auc

        X_train = train_data[['Winning_percentage', 'Average_Bet_Amount',
       'Number_of_Bonuses_Received', 'Amount_of_Bonuses_Received',
       'Revenue_from_Bonuses']]
        X_test = test_data[['Winning_percentage', 'Average_Bet_Amount',
       'Number_of_Bonuses_Received', 'Amount_of_Bonuses_Received',
       'Revenue_from_Bonuses']]
        y_train = train_data[['Should_Receive_Bonus']]
        y_test = test_data[['Should_Receive_Bonus']]
        models_list = []
        train_accuracy_list = []
        test_accurary_list = []
        train_f1_score_list = []
        test_f1_score_list = []
        train_precision_list = []
        test_precision_list = []
        train_recall_list = []
        test_recall_list = []
        train_auc_roc_list = []
        test_auc_roc_list = []

    
        for i in range(len(list(models))):
            model = list(models.values())[i]
            model.fit(X_train, y_train) # Train model

            # Make predictions
            y_train_pred = model.predict(X_train)
            y_test_pred = model.predict(X_test)

            # Training set performance
            model_train_accuracy, model_train_f1,model_train_precision,\
            model_train_recall,model_train_rocauc_score=evaluate_clf(y_train ,y_train_pred)


            # Test set performance
            model_test_accuracy,model_test_f1,model_test_precision,\
            model_test_recall,model_test_rocauc_score=evaluate_clf(y_test, y_test_pred)

            print(list(models.keys())[i])
            models_list.append(list(models.keys())[i])

            print('Model performance for Training set')
            print("- Accuracy: {:.4f}".format(model_train_accuracy))
            train_accuracy_list.append(model_train_accuracy)
            print('- F1 score: {:.4f}'.format(model_train_f1)) 
            train_f1_score_list.append(model_train_f1)
            print('- Precision: {:.4f}'.format(model_train_precision))
            train_precision_list.append(model_train_precision)
            print('- Recall: {:.4f}'.format(model_train_recall))
            train_recall_list.append(model_train_recall)
            print('- Roc Auc Score: {:.4f}'.format(model_train_rocauc_score))
            train_auc_roc_list.append(model_train_rocauc_score)

            print('----------------------------------')

            print('Model performance for Test set')
            print('- Accuracy: {:.4f}'.format(model_test_accuracy))
            test_accurary_list.append(model_test_accuracy)
            print('- F1 score: {:.4f}'.format(model_test_f1))
            test_f1_score_list.append(model_test_f1)
            print('- Precision: {:.4f}'.format(model_test_precision))
            test_precision_list.append(model_test_precision)
            print('- Recall: {:.4f}'.format(model_test_recall))
            test_recall_list.append(model_test_recall)
            print('- Roc Auc Score: {:.4f}'.format(model_test_rocauc_score))
            test_auc_roc_list.append(model_test_rocauc_score)

            print('='*35)
            print('\n')
        
        report=pd.DataFrame(list(zip(models_list, train_accuracy_list,test_accurary_list,\
                                    train_f1_score_list,test_f1_score_list, train_precision_list,\
                                    test_precision_list, train_recall_list,test_recall_list, \
                                    train_auc_roc_list,test_auc_roc_list )), columns=['Model Name',\
                                    "train_accuracy","test_accuracy", "train_f1_score","test_f1_score",\
                                    "train_precision","test_precision", "train_recall","test_recall", \
                                    "train_auc_roc","test_auc_roc" ]).sort_values(by=['test_accuracy'], ascending=False)
        report["train_test_acc_diff"] = report["train_accuracy"]-report["test_accuracy"]
        logger.info("Final accurary table")
        print(report.to_string(index=False))
        logger.info("From the table we get to known that XGBClassifier perform the best")
        xbg = XGBClassifier()
        xbg.fit(X_train, y_train)
        joblib.dump(xbg, os.path.join(self.config.root_dir, self.config.model_name))
        return report

    


     #   joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))

In [124]:
import os 
%pwd

'c:\\Users\\pouru\\OneDrive\\Desktop\\360_digit_mg_solution\\360_digit_mg_project\\Fair-and-Efficient-Bonus-Allocation-System'

In [180]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-08-19 10:15:36,946: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-08-19 10:15:36,950: INFO: common: yaml file: params.yaml loaded successfully]
[2024-08-19 10:15:36,958: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-08-19 10:15:36,961: INFO: common: created directory at: artifacts]
[2024-08-19 10:15:36,963: INFO: common: created directory at: artifacts/model_trainer]


  return fit_method(estimator, *args, **kwargs)


Random Forest
Model performance for Training set
- Accuracy: 0.9997
- F1 score: 0.9997
- Precision: 1.0000
- Recall: 0.9995
- Roc Auc Score: 0.9997
----------------------------------
Model performance for Test set
- Accuracy: 0.9016
- F1 score: 0.9063
- Precision: 0.9268
- Recall: 0.8867
- Roc Auc Score: 0.9028


Decision Tree
Model performance for Training set
- Accuracy: 0.9997
- F1 score: 0.9997
- Precision: 1.0000
- Recall: 0.9995
- Roc Auc Score: 0.9997
----------------------------------
Model performance for Test set
- Accuracy: 0.8704
- F1 score: 0.8793
- Precision: 0.8793
- Recall: 0.8793
- Roc Auc Score: 0.8697




  y = column_or_1d(y, warn=True)


Gradient Boosting
Model performance for Training set
- Accuracy: 0.9179
- F1 score: 0.9221
- Precision: 0.9282
- Recall: 0.9161
- Roc Auc Score: 0.9180
----------------------------------
Model performance for Test set
- Accuracy: 0.8920
- F1 score: 0.8983
- Precision: 0.9085
- Recall: 0.8882
- Roc Auc Score: 0.8923




  y = column_or_1d(y, warn=True)


Logistic Regression
Model performance for Training set
- Accuracy: 0.8901
- F1 score: 0.8953
- Precision: 0.9054
- Recall: 0.8854
- Roc Auc Score: 0.8904
----------------------------------
Model performance for Test set
- Accuracy: 0.8824
- F1 score: 0.8887
- Precision: 0.9031
- Recall: 0.8748
- Roc Auc Score: 0.8830




  return self._fit(X, y)


K-Neighbors Classifier
Model performance for Training set
- Accuracy: 0.9235
- F1 score: 0.9275
- Precision: 0.9329
- Recall: 0.9221
- Roc Auc Score: 0.9236
----------------------------------
Model performance for Test set
- Accuracy: 0.8832
- F1 score: 0.8906
- Precision: 0.8959
- Recall: 0.8852
- Roc Auc Score: 0.8830


XGBClassifier
Model performance for Training set
- Accuracy: 0.9843
- F1 score: 0.9852
- Precision: 0.9859
- Recall: 0.9844
- Roc Auc Score: 0.9843
----------------------------------
Model performance for Test set
- Accuracy: 0.9024
- F1 score: 0.9079
- Precision: 0.9204
- Recall: 0.8957
- Roc Auc Score: 0.9029


CatBoosting Classifier
Model performance for Training set
- Accuracy: 0.9485
- F1 score: 0.9511
- Precision: 0.9587
- Recall: 0.9437
- Roc Auc Score: 0.9488
----------------------------------
Model performance for Test set
- Accuracy: 0.8976
- F1 score: 0.9030
- Precision: 0.9183
- Recall: 0.8882
- Roc Auc Score: 0.8983




  y = column_or_1d(y, warn=True)


Support Vector Classifier
Model performance for Training set
- Accuracy: 0.8157
- F1 score: 0.8244
- Precision: 0.8339
- Recall: 0.8151
- Roc Auc Score: 0.8158
----------------------------------
Model performance for Test set
- Accuracy: 0.8128
- F1 score: 0.8249
- Precision: 0.8286
- Recall: 0.8212
- Roc Auc Score: 0.8121




  y = column_or_1d(y, warn=True)


AdaBoost Classifier
Model performance for Training set
- Accuracy: 0.8957
- F1 score: 0.9007
- Precision: 0.9106
- Recall: 0.8910
- Roc Auc Score: 0.8960
----------------------------------
Model performance for Test set
- Accuracy: 0.8928
- F1 score: 0.8977
- Precision: 0.9202
- Recall: 0.8763
- Roc Auc Score: 0.8941


[2024-08-19 10:15:49,164: INFO: 3163555312: Final accurary table]
               Model Name  train_accuracy  test_accuracy  train_f1_score  test_f1_score  train_precision  test_precision  train_recall  test_recall  train_auc_roc  test_auc_roc  train_test_acc_diff
            XGBClassifier        0.984267         0.9024        0.985165       0.907855         0.985908        0.920368      0.984422     0.895678       0.984257      0.902934             0.081867
            Random Forest        0.999733         0.9016        0.999749       0.906321         1.000000        0.926791      0.999497     0.886736       0.999749      0.902781             0.098133
   CatBoosting Clas