In [1]:
import pyspark
from attr.converters import optional
from pyspark.sql import SparkSession
import ConnectionConfig as cc
cc.setupEnvironment()

In [2]:
# Setup environment
cc.setupEnvironment()

from delta import configure_spark_with_delta_pip
from pyspark.sql import SparkSession

#Configuring Spark session with Delta Lake support
builder = SparkSession.builder \
    .appName("DBConnectionTest") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .master("local[4]")

#Adding necessary packages for JDBC and Kafka
extra_packages = [
    "org.apache.spark:spark-sql-kafka-0-10_2.12:3.1.2",
    "org.postgresql:postgresql:42.7.4"
]

# Configuring Spark with Delta Lake and extra packages
builder = configure_spark_with_delta_pip(builder, extra_packages=extra_packages)

#Create the spark
spark = builder.getOrCreate()

#Log level to Debug
spark.sparkContext.setLogLevel("DEBUG")

cc.set_connectionProfile("VeloBike")
print(cc.create_jdbc())
bike_rides_df = spark.read \
    .format("jdbc") \
    .option("driver" , "org.postgresql.Driver") \
    .option("url", cc.create_jdbc()) \
    .option("dbtable", "stations") \
    .option("user", cc.get_Property("username")) \
    .option("password", cc.get_Property("password")) \
    .option("partitionColumn", "stationid") \
    .option("numPartitions", 4) \
    .option("lowerBound", 0) \
    .option("upperBound", 1001) \
    .load()
# Show the first 1000 rows of the DataFrame
bike_rides_df.show(20)


jdbc:postgresql://localhost:5433/velodb
+---------+--------+---------+------------+--------------------+-------+-------+----------+-----------------+--------------------+-------+------+
|stationid|objectid|stationnr|        type|              street| number|zipcode|  district|         gpscoord|      additionalinfo|labelid|cityid|
+---------+--------+---------+------------+--------------------+-------+-------+----------+-----------------+--------------------+-------+------+
|        1|   33202|      026|DUBBELZIJDIG|         Meir (2000)|     84|   2000| ANTWERPEN|(51.2182,4.41241)|                    |   NULL|  NULL|
|        2|   33203|      019| ENKELZIJDIG|          ONTBREKEND|     12|   2000| ANTWERPEN| (51.219,4.40405)|                    |   NULL|  NULL|
|        3|   33204|      020| ENKELZIJDIG|Groenkerkhofstraa...|      2|   2000| ANTWERPEN|(51.2187,4.40066)| thv Nationalestraat|   NULL|  NULL|
|        4|   33205|      035| ENKELZIJDIG|Cockerillkaai (2000)|       |   2000| ANT