# Install Dependencies

In [None]:
pip install boto3
pip install delta-spark

# Upload file

In [None]:
import boto3
from botocore.exceptions import ClientError
import os

s3 = boto3.client('s3', 
    endpoint_url='http://minio:9000',
    aws_access_key_id='accesskey',
    aws_secret_access_key='secretkey',
    aws_session_token=None,
    config=boto3.session.Config(signature_version='s3v4'),
    use_ssl=False
)

s3.upload_file('people-100.csv', 'bucket1', 'people.csv')
response = s3.list_objects(Bucket='bucket1')
for obj in response.get('Contents', []):
    print(obj['Key'])

# Connect to Spark

In [None]:
from delta.tables import DeltaTable
from pyspark.sql import *
from sparkboiler import sparkSession

spark = sparkSession()

# Create new schema

In [None]:
spark.sql("DROP SCHEMA IF EXISTS testdata CASCADE")

spark.sql("CREATE DATABASE IF NOT EXISTS testdata LOCATION 's3a://bucket2/")
spark.sql("USE testdata")

# Read uploaded CSV with Spark

In [None]:
df = spark.read.csv("s3a://bucket1/people-100.csv", header=True, inferSchema=True)
df.show()

# Push Dataframe to Delta table

In [None]:
df.write.format("delta").option("delta.columnMapping.mode", "name")\
    .saveAsTable("testdata.people")

# Use Delta library to read data

In [None]:
dt = DeltaTable.forName(spark, "testdata.people")
dt.toDF().show()

# Use Spark SQL to read data

In [None]:
df = spark.sql("select * from testdata.people")
df.show()

# Terminate Spark Job

In [None]:
spark.stop()