In [None]:
# Create a generic data source class
class DataSource:
    # Abstract class
    def __init__(self, path):
        self.path = path

    def get_data_frame(self):
    # Abstract method, Function will be defined in subclasses
        raise ValueError("Not Implemented")

In [None]:
# Create a CSV data source
class CSVDataSource(DataSource):
    
    def get_data_frame(self):
        return (\
            spark.read.format("csv") \
                .option("header", True) \
                    .load(self.path)
        )

In [None]:
# Create Parquet data source
class ParquetDataSource(DataSource):
    
    def get_data_frame(self):
        return (\
            spark.read.format("parquet") \
                .load(self.path)
        )

In [None]:
# Create a delta data source
class DeltaDataSource(DataSource):
    
    def get_data_frame(self):        
        table_name = self.path        
        return (
            spark.read.table(table_name)
        )

In [None]:
# Function to get data source
def get_data_source(data_type, file_path):

    match data_type:
        case "csv":
            return  CSVDataSource(file_path)
        case "parquet":
            return  ParquetDataSource(file_path)
        case "delta":
            return  DeltaDataSource(file_path)
        case _:
            raise ValueError(f"Not implemented for data_type: {data_type}")