In [1]:
# Import Classes & Methods
from extractor import CustomerTransactionsExtractor
from transform import AirpodsAfterIphoneTransformer, OnlyAirpodsAndIphoneTransformer
from loader import AirPodsAfterIphoneLoader, OnlyAirpodsAndIPhoneLoader

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
26/01/10 09:30:33 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [2]:
# ETL pipeline to generate the data for all customers who have bought Airpods just after buying iPhone
class FirstWorkFlow:

    def __init__(self):
        pass

    def runner(self):

        # Step 1: Extract all required data from different source
        input_DFs = CustomerTransactionsExtractor().extract()

        # Step 2: Implement the Transformation logic - Customers who purchased Airpods after purchasing Iphones
        AirpodsAfterIphone_DFs = AirpodsAfterIphoneTransformer().transform(input_DFs)

        # Print message
        print("List of Customers who purchased Airpods after purchasing Iphones\n")
        
        # Show first N records
        AirpodsAfterIphone_DFs \
            .orderBy("customer_id","transaction_date","product_name") \
                .show(5, truncate=False)

        # Step 3: Load all required data to the appropriate sinks
        AirPodsAfterIphoneLoader(AirpodsAfterIphone_DFs).sink()

In [3]:
# ETL pipeline to generate the data for all customers who have bought only iPhone and Airpods
class SecondWorkFlow:

    def __init__(self):
        pass

    def runner(self):

        # Step 1: Extract all required data from different source
        input_DFs = CustomerTransactionsExtractor().extract()

        # Step 2: Implement the Transformation logic - Customers who purchased only Airpods and iPhone 
        onlyAirpodsAndIphone_DFs = OnlyAirpodsAndIphoneTransformer().transform(input_DFs)

        # Print message
        print("List of Customers who purchased only Airpods and Iphones\n")
        
        # Show first N records
        onlyAirpodsAndIphone_DFs \
            .show(5, truncate=False)

        # Step 3: Load all required data to the appropriate sinks
        OnlyAirpodsAndIPhoneLoader(onlyAirpodsAndIphone_DFs).sink()

In [4]:
class WorkFlowRunner:

    def __init__(self, name):
        self.name = name

    def runner(self):
        if self.name == "firstWorkFlow":
            return FirstWorkFlow().runner()
            
        elif self.name == "secondWorkFlow":
            return SecondWorkFlow().runner()
            
        else:
            raise ValueError(f"Not Implemented for {self.name}")

In [5]:
# Instantiate First WorkFlow class
name = "firstWorkFlow"
work_flow_runner = WorkFlowRunner(name).runner()

                                                                                

List of Customers who purchased Airpods after purchasing Iphones

+-----------+-------------+----------+--------+--------------+------------+----------------+-----------------+
|customer_id|customer_name|join_date |location|transaction_id|product_name|transaction_date|next_product_name|
+-----------+-------------+----------+--------+--------------+------------+----------------+-----------------+
|105        |Eva          |2022-01-01|Ohio    |11            |iPhone      |2022-02-01      |AirPods          |
|108        |Henry        |2022-04-01|Utah    |15            |iPhone      |2022-02-05      |AirPods          |
+-----------+-------------+----------+--------+--------------+------------+----------------+-----------------+



In [6]:
# Instantiate Second WorkFlow class
name = "secondWorkFlow"
work_flow_runner = WorkFlowRunner(name).runner()

List of Customers who purchased only Airpods and Iphones

+-----------+-------------+----------+--------+-----------------+
|customer_id|customer_name|join_date |location|products         |
+-----------+-------------+----------+--------+-----------------+
|107        |Grace        |2022-03-01|Colorado|[AirPods, iPhone]|
|108        |Henry        |2022-04-01|Utah    |[AirPods, iPhone]|
+-----------+-------------+----------+--------+-----------------+



                                                                                