In [2]:
import json, urllib.request
from pyflink.common import Configuration
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.table import StreamTableEnvironment, EnvironmentSettings

JM_HOST, JM_PORT = "jobmanager", 8081  # must match compose + flink-conf.yaml

# --- preflight: MUST reach the real JM, and must have >=1 TM registered
cfg = json.loads(urllib.request.urlopen(f"http://{JM_HOST}:{JM_PORT}/config").read())
print ("Job mangers")
print (cfg)

print ("-" * 50)

tms = json.loads(urllib.request.urlopen(f"http://{JM_HOST}:{JM_PORT}/taskmanagers").read())
print (tms)

assert len(tms.get("taskmanagers", [])) >= 1, "JM reachable but shows 0 TaskManagers"

Job mangers
{'refresh-interval': 3000, 'timezone-name': 'Coordinated Universal Time', 'timezone-offset': 0, 'flink-version': '1.20.2', 'flink-revision': '1641cb9 @ 2025-06-12T21:40:37+02:00', 'features': {'web-submit': True, 'web-cancel': True, 'web-rescale': False, 'web-history': False}}
--------------------------------------------------
{'taskmanagers': [{'id': '172.21.0.10:34949-49637f', 'path': 'pekko.tcp://flink@172.21.0.10:34949/user/rpc/taskmanager_0', 'dataPort': 45167, 'jmxPort': -1, 'timeSinceLastHeartbeat': 1764533504294, 'slotsNumber': 40, 'freeSlots': 40, 'totalResource': {'cpuCores': 40.0, 'taskHeapMemory': 1459, 'taskOffHeapMemory': 0, 'managedMemory': 1372, 'networkMemory': 343, 'extendedResources': {}}, 'freeResource': {'cpuCores': 40.0, 'taskHeapMemory': 1459, 'taskOffHeapMemory': 0, 'managedMemory': 1372, 'networkMemory': 343, 'extendedResources': {}}, 'hardware': {'cpuCores': 12, 'physicalMemory': 67303006208, 'freeMemory': 1665138688, 'managedMemory': 1438814063}, 

In [3]:
conf = Configuration()
conf.set_string("execution.target", "remote")
conf.set_string("rest.address", JM_HOST)
conf.set_string("rest.port", str(JM_PORT))
conf.set_string("pipeline.name", "table-remote-proof")
conf.set_string("pipeline.operator-chaining", "false")
conf.set_string("python.client.executable", "/usr/bin/python3")
conf.set_string("python.executable", "/usr/bin/python3")

# Build via DataStream env so the Table env inherits the remote config
env = StreamExecutionEnvironment.get_execution_environment(conf)
settings = EnvironmentSettings.in_streaming_mode()
t_env = StreamTableEnvironment.create(env, environment_settings=settings)
t_env.get_config().set("parallelism.default", "1")

2025-11-30T20:12:00.343436Z main ERROR Reconfiguration failed: No configuration found for '12f40c25' at 'null' in 'null'




2025-11-30T20:12:02.835335Z Thread-3 ERROR Reconfiguration failed: No configuration found for '664ce57f' at 'null' in 'null'


In [4]:

# Long-running streaming job: datagen -> print (stays RUNNING)
# Typical Streaming Data Flow

t_env.execute_sql("""
CREATE TABLE clicks_src (
  user_id STRING,
  url     STRING,
  ts      TIMESTAMP_LTZ(3)
) WITH (
  'connector' = 'datagen',
  'rows-per-second' = '1'
)
""")



<pyflink.table.table_result.TableResult at 0x7fbd97bf0a60>

In [5]:

t_env.execute_sql("""
CREATE TABLE out_print (
  user_id STRING,
  cnt     BIGINT
) WITH ('connector' = 'print')
""")



<pyflink.table.table_result.TableResult at 0x7fbd9a548fa0>

In [6]:

result = t_env.execute_sql("""
INSERT INTO out_print
SELECT user_id, COUNT(*) AS cnt
FROM clicks_src
GROUP BY user_id
""")

print("Submitted 'table-remote-proof'. Open http://localhost:8081 → Jobs. Check TM logs for print output.")

2025-11-30T20:12:38.143929Z Thread-3 ERROR Reconfiguration failed: No configuration found for '589541b6' at 'null' in 'null'
Submitted 'table-remote-proof'. Open http://localhost:8081 → Jobs. Check TM logs for print output.
5> +I[c9ea32ba4aa8392d148e74b5717f0858122b33a751322dd651881a90c217378a07188ea51aa35cf2e009910d822202b79f1d, 1]
7> +I[15fbcaaadcb4f9bc67b4ab689067eec905045266e544a55d7d011a3831c44d514e250141cff1b3d1504e0ccbaeb0f5adf6c7, 1]
8> +I[a67b765ba498de043c4253e9e85c382da208b4fa2c81acd43ce3581f4b138e33de8a610b7965d844facaf9c49811f8606b3d, 1]
8> +I[adfbf047468f077f8690d8c26ee6f2c15b92c53adfa3a86b0a15bf59ca460fd7d70c6e8df3de4760c349cab8ce360c181782, 1]
11> +I[28690eaf0537edd01293ec824466e4b13a3b1724bffce58a75b38ed04e664b9404d82389e539132e5a1ada94433ac88a7b98, 1]
1> +I[286d4ce545b7a050e8026a5a552d68991f8f41bf2cc1346ab817f1b8d7bb4f44f8fb9c893ee2afe768b8efed2e3744d396d1, 1]
1> +I[8c6830f84bb6839ab298e89ea2642a63430d649b7bae8df5f3cbe0fa9ed1aa169820268648fbd9cdf729e1104b4cc9a9cf8c, 1

In [7]:
job_client = result.get_job_client()

# block until completion (stream will never complete)
# execution_result = job_client.get_job_execution_result().get()

# cancel job programmatically:
job_client.cancel()

<pyflink.common.completable_future.CompletableFuture at 0x7fbd9a549210>