In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import max as spark_max, avg, round

class ChargePointsETLJob:
    def __init__(self, input_path, output_path):
        self.input_path = input_path
        self.output_path = output_path
        self.spark_session = SparkSession.builder \
            .master("local[*]") \
            .appName("ElectricChargePointsETLJob") \
            .getOrCreate()

    def extract(self):
        return self.spark_session.read.csv(self.input_path, header=True, inferSchema=True)

    def transform(self, df):
        return df.groupBy("CPID") \
                 .agg(
                     round(spark_max("PluginDuration"), 2).alias("max_duration"),
                     round(avg("PluginDuration"), 2).alias("avg_duration")
                 ) \
                 .withColumnRenamed("CPID", "chargepoint_id")

    def load(self, df):
        df.write.mode("overwrite").parquet(self.output_path)

    def run(self):
        self.load(self.transform(self.extract()))