In [0]:
%pip install dbldatagen

In [0]:
import dbldatagen as dg
import ipywidgets as widgets
from pyspark.sql.types import StructType, StructField,  StringType

In [0]:
catalog_widget = widgets.Text(
  value='users',
  placeholder='users',
  description='Catalog:',
  disabled=False   
)
schema_widget = widgets.Text(
  value='schema',
  placeholder='schema',
  description='Schema:',
  disabled=False   
)
box = widgets.HBox([catalog_widget, schema_widget])  # For horizontal layout
display(box)

In [0]:
catalog = catalog_widget.value
schema = schema_widget.value
table_name = catalog + "." + schema + ".test_vehicle_data"
print(table_name)

In [0]:
sql = f"""CREATE TABLE IF NOT EXISTS {table_name} (
                name STRING, 
                serial_number STRING, 
                license_plate STRING, 
                manufacturer STRING,
                email STRING
                ) USING DELTA"""

print(sql)

In [0]:
spark.sql(sql)
table_schema = spark.table(table_name).schema
print(table_schema)

In [0]:
shuffle_partitions_requested = 8
partitions_requested = 8
data_rows = 1000000
  
dataspec = (dg.DataGenerator(spark, rows=data_rows, partitions=partitions_requested, shufflePartitions=shuffle_partitions_requested)
            .withSchema(table_schema))

dataspec = (
    dataspec
        .withColumnSpec("name", percentNulls=0.01, template=r"\\w \\w|\\w a. \\w")
        .withColumnSpec(
            "serial_number", minValue=1000000, maxValue=10000000, prefix="dr", random=True
        )
        .withColumnSpec("license_plate", template=r"\\n-\\n")
        .withColumnSpec("manufacturer", "string", values=['Toyota', 'Ford', 'Honda', 'Chevrolet', 'Kia'])
        .withColumnSpec("email", template=r"\\w.\\w@\\w.com")
        
)
df1 = dataspec.build()

df1.write.format("delta").mode("overwrite").saveAsTable(table_name)

In [0]:
display(df1)

In [0]:
catalog = catalog_widget.value
schema = schema_widget.value

create_table_sql = f"""
CREATE OR REPLACE TABLE {catalog}.{schema}.auto_users (
    id INT,
    name STRING,
    manufacturer STRING
)
"""

display(create_table_sql)

In [0]:
spark.sql(create_table_sql)

In [0]:
insert_rows_sql = f"""
INSERT INTO {catalog}.{schema}.auto_users (id, name, manufacturer) VALUES
(1, 'scott.stafford@databricks.com', 'Ford'),
(2, 'Bob', 'CompanyB'),
(3, 'Charlie', 'CompanyC')
"""

display(insert_rows_sql)

In [0]:
spark.sql(insert_rows_sql)