In [None]:
import os
import logging

from pyflink.common import WatermarkStrategy
from pyflink.common.serialization import SimpleStringSchema
from pyflink.datastream import StreamExecutionEnvironment, RuntimeExecutionMode
from pyflink.datastream.connectors.kafka import KafkaSource, KafkaOffsetsInitializer
from pyflink.datastream.formats.json import JsonRowDeserializationSchema
from pyflink.common import Types, Row

RUNTIME_ENV = os.getenv("RUNTIME_ENV", "local")
BOOTSTRAP_SERVERS = os.getenv("BOOTSTRAP_SERVERS", "localhost:9092")

In [None]:
env = StreamExecutionEnvironment.get_execution_environment()
env.set_runtime_mode(RuntimeExecutionMode.STREAMING)

In [None]:
jar_files = ["flink-sql-connector-kafka-3.2.0-1.18.jar"]

In [None]:
CURRENT_DIR = os.getcwd()
CURRENT_DIR

In [None]:
row_type_info = Types.ROW_NAMED(['id', 'name', 'email'], [Types.INT(), Types.STRING(), Types.STRING()])
json_format = JsonRowDeserializationSchema.builder().type_info(row_type_info).build()

In [None]:
jar_paths = tuple(
            [f"file://{os.path.join(CURRENT_DIR, 'Downloads', name)}" for name in jar_files]
        )
logging.info(f"adding local jars - {', '.join(jar_files)}")
env.add_jars(*jar_paths)

In [None]:
flink_test_source = (
        KafkaSource.builder()
        .set_bootstrap_servers(BOOTSTRAP_SERVERS)
        .set_topics("simple_json_topic")
        .set_group_id("flink.tester1")
        .set_starting_offsets(KafkaOffsetsInitializer.latest())
        .set_value_only_deserializer(
            json_format
        )
        .build()
    )

In [None]:
flink_stream = env.from_source(
        flink_test_source, WatermarkStrategy.no_watermarks(), "flink kafka source"
    )

flink_stream.print()

In [None]:
env.execute("importer")