In [0]:
import calendar
import datetime
import numpy
import random
import uuid

from pyspark.sql import DataFrame, Row
from pyspark.sql import functions as F

from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, DateType

In [0]:
rng = numpy.random.default_rng()

def generate_day_of_month(year: int, month: int) -> datetime.date:
    return int(random.randint(1, calendar.monthrange(year, month)[1]))

def generate_date(year: int, month: int) -> datetime.date:
    return datetime.date(year, month, generate_day_of_month(year, month)) 

def union_all(dataframes: list[DataFrame]) -> DataFrame | None:
    if len(dataframes) == 1:
        return dataframes[0]
    else:
        head, *tail = dataframes
        return head.unionByName(union_all(tail))    

In [0]:
def coffees(
    name: str,
    coffee_type: str,
    mug_size: str,
    city: str,
    count: int
) -> Row:
    return spark.createDataFrame([
        Row(** {
        "name": name,
        "date": datetime.date(2023, 11, random.randint(1, 16)),
        "coffee_type": coffee_type,
        "mug_size": mug_size,
        "city": city
    }) for coffee in range(count)])

In [0]:
coffees = union_all([
    coffees("Magda", "capuccino", "M", "Kraków", 26),
    coffees("Krzysztof", "espresso", "S", "Płock", 31),
    coffees("Anna", "lungo", "M", "Grudziądz", 16),
    coffees("Andrzej", "americano", "L", "Kalisz", 70),
    coffees("Joanna", "latte", "L", "Lublin", 25),
    coffees("Wojciech", "decaf", "L", "Szczecin", 64),
    coffees("Dominika", "lungo", "M", "Świebodzin", 16),
    coffees("Aneta", "capuccino", "M", "Legnica", 9),
    coffees("Artur", "espresso", "S", "Olsztyn", 14),    
    coffees("Mariusz", "americano", "L", "Kielce", 36),
    coffees("Maciej", "latte", "L", "Kraków", 16),
    coffees("Paulina", "decaf", "L", "Grudziądz", 1),
    coffees("Radosław", "americano", "L", "Kielce", 11),
    coffees("Katarzyna", "latte", "L", "Płock", 42),
    coffees("Jan", "espresso", "S", "Lublin", 30),
])

In [0]:
coffees.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("coffees")