In [None]:
class DataReader:
    def __init__(self, format, file_name, location, delimiter=",", multiline="true"):
        self.format = format
        self.file_name = file_name
        self.location = location
        self.delimiter = delimiter
        self.multiline = multiline

    def read_data(self):
        if self.format == "csv":
            df = spark.read.option("header", "true").option("inferSchema", "true").option("delimiter", self.delimiter).csv(self.location)
        elif self.format == "json":
            df = spark.read.option("inferSchema", "true").option("multiline", self.multiline).json(self.location)
        elif self.format == "parquet":
            df = spark.read.parquet(self.location)
        else:
            raise ValueError(f"Unsupported format: {self.format}")
        return df

    def create_global_temp_view(self):
        df = self.read_data()
        df.createOrReplaceGlobalTempView(self.file_name)
        print(f"Global Temporary view '{self.file_name}' created successfully.")

In [None]:
# Create widgets for input parameters
dbutils.widgets.text("format", "")
dbutils.widgets.text("file_name", "")
dbutils.widgets.text("location", "")
dbutils.widgets.text("delimiter", ",")
dbutils.widgets.text("multiline", "true")

In [None]:
# Retrieve widget values
format = dbutils.widgets.get("format")
file_name = dbutils.widgets.get("file_name")
location = dbutils.widgets.get("location")
delimiter = dbutils.widgets.get("delimiter")
multiline = dbutils.widgets.get("multiline")
print(f"Format: {format}, File_Name: {file_name}, Location: {location}, Delimiter: {delimiter}")

Format: , File_Name: , Location: , Delimiter: ,


In [None]:

# Initialize the DataReader class with the widget values
reader = DataReader(format = format, file_name = file_name, location = location, delimiter = delimiter, multiline = multiline)

# Create global temporary view
reader.create_global_temp_view()