# PySpark Project Notebook 

## Imports

In [22]:
import logging
import sys

import pyspark
from pyspark.sql import SparkSession

## Logger

In [26]:
class Log4j:
    """
    Log4j Logger for Spark applications.

    This class provides methods to log messages with different severity levels,
    including warn, info, error, and debug.

    Args:
        spark_session (SparkSession): The SparkSession instance for the application.

    Attributes:
        spark (SparkSession): The SparkSession instance.
        logger: The logger from the org.apache.log4j module.
        py_logger: The Python logger instance for logging to the console and output.

    """

    def __init__(self, spark_session: SparkSession):
        """
        Initialize the Log4j instance.

        Args:
            spark_session (SparkSession): The SparkSession instance for the application.
        """
        self.spark = spark_session
        self.logger = self.spark._jvm.org.apache.log4j

        # Configure Python logging
        logging.basicConfig(
            stream=sys.stdout,
            level=logging.INFO,
            format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        )
        self.py_logger = logging.getLogger(__name__)

    def warn(self, message: str):
        """
        Log a warning message.

        Args:
            message (str): The message to log.
        """
        self.logger.warn(message)
        self.py_logger.warning(message)

    def info(self, message: str):
        """
        Log an information message.

        Args:
            message (str): The message to log.
        """
        self.logger.info(message)
        self.py_logger.info(message)

    def error(self, message: str):
        """
        Log an error message.

        Args:
            message (str): The message to log.
        """
        self.logger.error(message)
        self.py_logger.error(message)

    def debug(self, message: str):
        """
        Log a debug message.

        Args:
            message (str): The message to log.
        """
        self.logger.debug(message)
        self.py_logger.debug(message)

## Starting a session

In [10]:
spark = SparkSession.builder.appName("Spark Project").master("local[*]").getOrCreate()

### Adding logging to session

In [24]:
logger = Log4j(spark)
logger.info("Spark Session started")

2023-08-27 00:26:08,368 - __main__ - INFO - Spark Session started


## Stopping a session

In [27]:
logger.info("Stopping Spark Session")
spark.stop()

2023-08-27 00:30:52,461 - __main__ - INFO - Stopping Spark Session
