In [2]:
# Import logging for logging messages and errors
import logging

# Import custom modules for handling data, preprocessing, training, evaluating, and predicting
from src.data_handler import DataHandler
from src.data_preprocessor import DataPreprocessor
from src.model_trainer import ModelTrainer
from src.model_evaluator import ModelEvaluator
from src.predictor import Predictor

# Import the GradientBoostingClassifier from scikit-learn for model training
from sklearn.ensemble import GradientBoostingClassifier 

# Configure logging to display information level logs
logging.basicConfig(level=logging.INFO)

def main():
    """
    Main function to orchestrate the data loading, preprocessing, training, evaluation, and prediction steps.
    """
    # Initialize the DataHandler with paths to the data files
    data_handler = DataHandler('data/historical_sensor_data.csv', 'data/latest_sensor_data.csv')
    # Load the data from the specified paths
    data_handler.load_data()

    # Check if historical data is not empty
    if not data_handler.get_historical_data().empty:
        # Preprocess the historical data
        historical_data = data_handler.get_historical_data()
        # Initialize the DataPreprocessor with the features of historical data (excluding the label)
        preprocessor = DataPreprocessor(historical_data.drop(columns=['label']))
        # Scale/transform the historical data
        scaled_data = preprocessor.preprocess()
        
        # Split the data into features (X) and target variable (y)
        X, y = scaled_data, historical_data['label'].values

        # Initialize and train the model
        model = GradientBoostingClassifier()
        trainer = ModelTrainer(model, X, y)
        trainer.train()

        # Evaluate the model
        evaluator = ModelEvaluator(model, trainer.get_test_data()[0], trainer.get_test_data()[1])
        # Generate and log the evaluation report
        evaluation_report = evaluator.evaluate()
        logging.info(evaluation_report)

        # Ensure the latest data is preprocessed in the same way as historical data
        latest_data = data_handler.get_latest_data()
        # Initialize the Predictor with the model, preprocessor, and latest data
        predictor = Predictor(model, preprocessor, latest_data)
        # Make predictions on the latest data and log them
        predictions = predictor.make_predictions()
        logging.info(predictions)

if __name__ == '__main__':
    main()



INFO:root:              precision    recall  f1-score   support

         0.0       0.98      0.98      0.98       128
         1.0       0.98      0.98      0.98       172

    accuracy                           0.98       300
   macro avg       0.98      0.98      0.98       300
weighted avg       0.98      0.98      0.98       300

INFO:root:[1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0.
 1. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 0. 1. 1. 1. 1.]
