In [1]:
from application_logger import setup_logger
from Splitting_Scaling import *
from sklearn.ensemble import RandomForestRegressor

ModuleNotFoundError: No module named 'setup_logger'

In [None]:
from math import e
import warnings
warnings.filterwarnings('ignore')
import os 
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pickle
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.ensemble import  ExtraTreesRegressor
from setup_logger import setup_logger


class Parameter_tuning:
    def __init__(self):
        self.folder = '../logs/'
        self.filename = 'Model_tuning.txt'
        if not os.path.isdir(self.folder):
            os.mkdir(self.folder)
        self.log_object = setup_logger("Model_tuning",self.folder+self.filename)
        
        self.log_object.info('Strated calling Splitting_Scaling file')        
        self.split_obj=Splitting_And_Scaling()
        self.dict={}
        self.log_object.info('Splitting_Scaling file called Sucessfully.')    
        
        

    def parameters(self):
        """
        Method: parameters
        Description: This method is used to define the parameters for the model
        Parameters: None
        Return: parameters for individual models

        Version: 1.0
        """
        self.log_object.info('Trying to set hyper-paramerts')  
        et_parameters={'n_estimators': [int(x) for x in np.linspace(start = 100, stop = 1000, num = 12)],
                       'max_features': ['auto', 'sqrt'],
                       'max_depth': [int(x) for x in np.linspace(5, 30, num = 6)],
                       'min_samples_split': [2, 5, 10],
                       'min_samples_leaf': [1, 2, 5]
                      }
        
        rf_parameters={'n_estimators': [100,200,300,400,500,600],
                       'max_features': ['auto', 'sqrt'],
                       'max_depth': [int(x) for x in np.linspace(10, 110, num = 11)],
                       'min_samples_split': [2, 5,10],
                       'min_samples_leaf': [1, 2,4],
                       'bootstrap': [True, False]
                      }
        self.log_object.info('Hyper-paramerts is successfully set.') 
        return et_parameters, rf_parameters
        
    def et_tuning(self):
        """
        Method: et_tuning
        Description: This method is used to tune the parameters for the Extra Trees Regressor model
        Parameters: None
        Return: Best hyperparameters for the Extra Trees Regressor model and tuned model

        Version: 1.0
        """
        self.log_object.info('Train-Test Split') 
        x_train,x_test,y_train,y_test=self.split_obj.scaling()
        try:
            self.log_object.info('ExtraTree Regressor: Model Tuning Started') 
            et_parameters = self.parameters()[0]
            et_reg = ExtraTreesRegressor()
            random_et = RandomizedSearchCV(estimator=et_reg,
                                        param_distributions=et_parameters,
                                        cv=5,
                                        scoring='neg_root_mean_squared_error',
                                        n_iter=10,
                                        n_jobs=1,
                                        verbose=2,
                                        random_state=42
                                        )
            
            random_et.fit(x_train,y_train) 
            best_param = random_et.best_params_
            self.log_object.info('ExtraTree Regressor:Best Parameters found.') 
            
            et_model = ExtraTreesRegressor(n_estimators = best_param['n_estimators'],
                                            max_features = best_param['max_features'],
                                            max_depth = best_param['max_depth'],
                                            min_samples_split = best_param['min_samples_split'],
                                            min_samples_leaf = best_param['min_samples_leaf'])
            
            self.log_object.info('ExtraTree Regressor:Using best parameter model tuning done.') 
            
            et_model.fit(x_train,y_train)
            self.log_object.info('ExtraTree Regressor:Training data fitted to tuned model.') 
        except Exception as e:
            self.log_object.info('Error in ExtraTree Regressor Tuning.'+str(e)) 
            raise e

        try:
            self.log_object.info('ExtraTree Regressor:Finding train and test accuracy')
            et_train_score=et_model.score(x_train,y_train)
            et_test_score=et_model.score(x_test,y_test)
            
            y_pred = et_model.predict(x_test)
            et_r2score=r2_score(y_test,y_pred)
            self.log_object.info('ExtraTree Regressor:train and test accuracy found sucessfully.')
            
            self.log_object.info('ExtraTree Regressor:Converting accuracy scores to dictionary')
            self.dict['Extratree']=[et_model,et_train_score,et_test_score,et_r2score]
            self.log_object.info('ExtraTree Regressor:Succesfully converted accuracy scores to dictionary')
        except Exception as e:
            self.log_object.info('Error in ExtraTree Regressor Accuracy.'+str(e)) 
            raise e
                
    def rf_tuning(self):
        """
        Method: rf_tuning
        Description: This method is used to tune the parameters for the Random forest model
        Parameters: None
        Return: Best hyperparameters for the Random forest model and tuned model

        Version: 1.0
        """
        try:
            rf_parameters = self.parameters()[1]
            x_train, x_test, y_train, y_test = self.split_obj.scaling()
            self.log_object.info('RandomForestRegressor: Train-Test Split') 
            
            rf_reg = RandomForestRegressor()
            self.log_object.info('RandomForestRegressor: Model Tuning Started') 
            
            random_rf = RandomizedSearchCV(estimator=rf_reg,
                                        param_distributions=rf_parameters,
                                        cv=5,
                                        scoring='neg_root_mean_squared_error',
                                        n_iter=10,
                                        n_jobs=1,
                                        verbose=2,
                                        random_state=45
                                        )
            random_rf.fit(x_train,y_train)
            best_param = random_rf.best_params_
            self.log_object.info('RandomForestRegressor: Best parameter found.') 
    
            rf_model = RandomForestRegressor(n_estimators = best_param['n_estimators'],
                                            min_samples_split = best_param['min_samples_split'],
                                            min_samples_leaf = best_param['min_samples_leaf'],
                                            max_features = best_param['max_features'],
                                            max_depth = best_param['max_depth'],
                                            bootstrap=best_param['bootstrap'])
           
            self.log_object.info('RandomForestRegressor: trying to fit model using best params') 
            rf_model.fit(x_train,y_train)
            self.log_object.info('RandomForestRegressor: Success-fit model using best params') 
            
            self.log_object.info('RandomForestRegressor:Finding train and test accuracy')
            rf_train_score=rf_model.score(x_train,y_train)
            rf_test_score=rf_model.score(x_test,y_test)
            
            y_pred = rf_model.predict(x_test)
            rf_r2score=r2_score(y_test,y_pred)
            self.log_object.info('RandomForestRegressor:train and test accuracy found sucessfully.')
            
            self.log_object.info('RandomForestRegressor:Converting accuracy scores to dictionary')
            self.dict['Extratree_Model']=[rf_model,rf_train_score,rf_test_score,rf_r2score]
            self.log_object.info('RandomForestRegressor:Succesfully converted accuracy scores to dictionary')
            
        except Exception as e:
            self.log_object.info('Error in RandomForest Regressor Accuracy.'+str(e)) 
            raise e
            
            
    def algo_run(self):
        self.et_tuning()
        self.rf_tuning()
        
    def model_result(self):
        """
        Method: model_result
        Description: This method is used to print the best model and the corresponding score
        Parameters: None
        Return: Store the scores obtain from different algorithms in a dictionary

        Version: 1.0
        """
        try:
            self.log_object.info('Findind best model..')
            self.algo_run()
            d = self.dict
        
            d = sorted(d.items(), key=lambda a:a[1][1])
            
            best_model_name = d[0][0]
            best_model_object = d[0][1][0]
            best_model_train_score = d[0][1][1]
            best_model_test_score = d[0][1][2]
            best_re2_score=d[0][1][3]
            if not os.path.isdir('./bestmodel/'):
                os.mkdir('./bestmodel/')
            
            with open('./bestmodel/'+best_model_name+'.pkl','wb') as file:
                pickle.dump(best_model_object,file)
#             return f'''Best model:{best_model_name}
#                        Train_score:{best_model_train_score}
#                        Test_score:{best_model_test_score}
#                        R2_score:{best_re2_score}''' 
            print(f'Best model:{best_model_name}\nTrain_score:{best_model_train_score}\nTest_score:{best_model_test_score}\nR2_score:{best_re2_score}')
            self.log_object.info('Best model found.')
        except Exception as e:
            self.log_object.info('Error in finding best model'+str(e))
            raise e

In [None]:
o = Parameter_tuning()

In [None]:
o.model_result()

In [None]:
95-86