### Utility Functions

In [None]:
import os
import glob
import itertools
import random
import socket
import struct
import boto3


CLIENT = boto3.client('s3')

def upload(table, bucket, path):
    assert os.path.isdir(table)
    for item in itertools.chain(glob.glob(table + '/**'), glob.glob(table + '/.**')):
        if not os.path.isfile(item):
            upload(item, bucket, path + '/' + os.path.basename(item))
        else:
            CLIENT.upload_file(
                item,
                bucket,
                os.path.join(path, os.path.basename(item))
            )


def ipv4():
    return socket.inet_ntoa(struct.pack('>I', random.randint(1, 0xffffffff)))


def port():
    return random.randrange(0, 65536)

### Prepare CSV File

In [None]:
import csv


with open('../../data/security.csv', 'w', newline='') as csvfile:
    writer = csv.writer(
        csvfile,
        delimiter=',',
    )
    writer.writerow(['src_ip', 'src_port', 'dst_ip', 'dst_port'])
    for _ in range(10_000):
        writer.writerow([ipv4(), port(), ipv4(), port()])

### Setup Spark Session

In [None]:
from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip


builder = SparkSession.builder.appName('CreateDeltaTables') \
    .config(
        'spark.jars.packages',
        'io.delta:delta-core_2.12:2.2.0') \
    .config(
        'spark.sql.extensions',
        'io.delta.sql.DeltaSparkSessionExtension') \
    .config(
        'spark.sql.catalog.spark_catalog',
        'org.apache.spark.sql.delta.catalog.DeltaCatalog')

spark = configure_spark_with_delta_pip(builder).getOrCreate()

### Load CSV

In [None]:
security = spark.read \
    .format('csv') \
    .option('header', 'true') \
    .option('inferSchema', 'true') \
    .load('../../data/security.csv')
security.show(n=5, truncate=False, vertical=True)

### Split Delta Table

In [None]:
n_splits = 10
each_len = security.count() // n_splits
copied = security

i = 0
while i < n_splits:
    chunk = copied.limit(each_len)
    copied = copied.subtract(chunk)
    if i == 0:
        chunk.write \
            .format('delta') \
            .save('../../data/security-table')
    else:
        chunk.write \
            .mode('append') \
            .format('delta') \
            .save('../../data/security-table')
    i += 1