In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructField, StructType, StringType, IntegerType, LongType
from pyspark.sql.functions import from_json,col, to_timestamp, date_format, concat

events_schema = StructType([ 
    StructField('timestamp', StringType(), True),
    StructField('type', StringType(), True),
    StructField('appName', StringType(), True), 
    StructField('appInstance', LongType(), True),
    StructField('appID', StringType(), True),
    StructField('probeID', StringType(), True),
    StructField('eventID', StringType(), True),
    StructField('correletionID', LongType(), True),
    StructField('locationID', StringType(), True),
    StructField('transactionStart', LongType(), True), 
    StructField('transactionEnd', LongType(), True), 
    StructField('transactionDuration', LongType(), True), 
    StructField('clientIPAddress', StringType(), True),
    StructField('clientPort', IntegerType(), True), 
    StructField('serverIPAddress', StringType(), True), 
    StructField('serverPort', IntegerType(), True), 
    StructField('ipProtocol', StringType(), True), 
    StructField('category', StringType(), True), 
    StructField('bytesFromClient', LongType(), True), 
    StructField('bytesToClient', LongType(), True), 
    StructField('bytesFromServer', LongType(), True), 
    StructField('bytesToServer', LongType(), True), 
    StructField('subscriberID', StringType(), True), 
    StructField('applicationProtocol', StringType(), True), 
    StructField('applicationName', StringType(), True), 
    StructField('domain', StringType(), True), 
    StructField('deviceType', StringType(), True), 
    StructField('networkType', StringType(), True), 
    StructField('contentType', StringType(), True), 
    StructField('lostBytesClient', LongType(), True), 
    StructField('lostBytesServer', LongType(), True), 
    StructField('srttMsClient', LongType(), True), 
    StructField('srttMsServer', LongType(), True), 
])


def get_spark_session():
    spark = SparkSession \
        .builder \
        .appName("stream-from-Kafka2") \
        .config("spark.streaming.stopGracefullyOnShutdown", True) \
        .config("spark.jars.packages", "com.datastax.spark:spark-cassandra-connector_2.12:3.5.0,org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0") \
        .config("spark.cassandra.connection.host", "events-db") \
        .config("spark.cassandra.connection.port","9042")\
        .config("spark.cassandra.auth.username", "cassandra") \
        .config("spark.cassandra.auth.password", "cassandra") \
        .config("spark.sql.shuffle.partitions", 4) \
        .master("spark://spark-master:7077") \
        .getOrCreate()
    return spark