In [None]:
# run interactively

# gitbash on windows
"""
winpty docker exec -it flink-jupyter bash -lc \
"jupyter nbconvert --to notebook --execute /notebooks/ModesBasic.ipynb \
 --ExecutePreprocessor.timeout=600 --ExecutePreprocessor.kernel_name=python3 \
 --output /notebooks/ModesBasicOut.ipynb"
"""

# Linux
"""
docker exec -it flink-jupyter bash -lc \
"jupyter nbconvert --to notebook --execute /notebooks/ModesBasic.ipynb \
 --ExecutePreprocessor.timeout=600 --ExecutePreprocessor.kernel_name=python3 \
 --output /notebooks/ModesBasicOut.ipynb"
"""

# Powershell
"""
docker exec -it flink-jupyter bash -lc `
"jupyter nbconvert --to notebook --execute /notebooks/ModesBasic.ipynb `
 --ExecutePreprocessor.timeout=600 `
 --ExecutePreprocessor.kernel_name=python3 `
 --output /notebooks/ModesBasicOut.ipynb"
"""

In [15]:
# understanding +I, -D and U in flink
import os
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.table import StreamTableEnvironment, RowKind
from pyflink.common import Types

from pyflink.table import EnvironmentSettings, TableEnvironment
import get_env

(batch_env,batch_t_env) = get_env.get_remote_batch_env()

from pyflink.datastream import StreamExecutionEnvironment, RuntimeExecutionMode
from pyflink.common.typeinfo import Types

env = StreamExecutionEnvironment.get_execution_environment()
env.set_runtime_mode(RuntimeExecutionMode.STREAMING)
env.set_parallelism(1)
t_env = StreamTableEnvironment.create(env)


In [16]:
t_env.execute_sql("DROP TEMPORARY TABLE IF EXISTS sales_input")

t_env.execute_sql("DROP VIEW IF EXISTS sales_input")

# Create view directly from VALUES (no INSERT required)
t_env.execute_sql("""
CREATE TEMPORARY VIEW sales_input AS
SELECT * FROM (VALUES
  (1, 10, 100),
  (2, 10, 200),
  (3, 10, 300)
) AS T(sale_id, product_id, amount)
""")

# Aggregation
agg = t_env.sql_query("""
SELECT 
  product_id,
  SUM(amount) AS total_amount,
  COUNT(sale_id) AS num_sales
FROM sales_input
GROUP BY product_id
""")

# Print sink
t_env.execute_sql("DROP TABLE IF EXISTS print_out")
t_env.execute_sql("""
CREATE TEMPORARY TABLE print_out (
  product_id INT,
  total_amount BIGINT,
  num_sales BIGINT
) WITH ('connector' = 'print')
""")

# Execute and show final result (in streaming mode a bounded view will typically emit once)
agg.execute_insert("print_out").wait()

2025-12-02T19:33:36.628245Z Thread-3 ERROR Reconfiguration failed: No configuration found for '725309a9' at 'null' in 'null'
+I[10, 100, 1]
-U[10, 100, 1]
+U[10, 300, 2]
-U[10, 300, 2]
+U[10, 600, 3]


In [None]:
"""
Symbol	Meaning	Description
+I	Insert	A completely new row is added to the result table
-U	Update Before	The old value of a row must be removed (retracted) before updating
+U	Update After	The new updated value of that row
-D	Delete	The row is fully removed from the result table
"""

"""
Flink Operators Produce the Changelog

GROUP BY
WINDOW
JOIN
DISTINCT
UPSERT JOIN
etc
"""

# FLOW: input stream → operator state → changelog (+I, -U, +U, -D) → sink
# Sinks behave differently depending on their type

In [17]:
# Convert to changelog stream
ds = t_env.to_changelog_stream(agg)


In [18]:
# A custom code to know what your sink function will receive
def format_changelog(elem):
    # support both shapes: Row.get_row_kind() OR (rowkind, row)
    try:
        rk = elem.get_row_kind()
        row = elem
    except Exception:
        try:
            rk, row = elem[0], elem[1]
        except Exception:
            rk, row = None, elem

    if rk == RowKind.INSERT:
        mode = "+I"
    elif rk == RowKind.UPDATE_BEFORE:
        mode = "-U"
    elif rk == RowKind.UPDATE_AFTER:
        mode = "+U"
    elif rk == RowKind.DELETE:
        mode = "-D"
    else:
        mode = "??"

    # convert row to tuple-like string (works for Row, tuple, list)
    try:
        # If it's a Row or iterable
        row_fields = list(row)
        row_str = "(" + ", ".join(str(x) for x in row_fields) + ")"
    except Exception:
        row_str = str(row)

    return f"{mode} {row_str}"

mapped = ds.map(format_changelog, output_type=Types.STRING())
mapped.print()           # built-in print sink (no Java sink wrapper needed)
env.execute("changelog_map_print_demo")

2025-12-02T19:33:46.656830Z Thread-3 ERROR Reconfiguration failed: No configuration found for '2566925c' at 'null' in 'null'
+I (10, 100, 1)
-U (10, 100, 1)
+U (10, 300, 2)
-U (10, 300, 2)
+U (10, 600, 3)


<pyflink.common.job_execution_result.JobExecutionResult at 0x7fa0cbf12620>

In [21]:
# WE DO EXACTLY SAME IN BATCH MODE
print ("-" * 10, "BATCH MODE" , "-" * 10)

---------- BATCH MODE ----------


In [20]:
batch_env = EnvironmentSettings.new_instance().in_batch_mode().build()
# Create the TableEnvironment
batch_t_env = TableEnvironment.create(environment_settings=batch_env)

In [22]:
batch_t_env.execute_sql("DROP TEMPORARY TABLE IF EXISTS sales_input")

batch_t_env.execute_sql("DROP VIEW IF EXISTS sales_input")

# Create view directly from VALUES (no INSERT required)
batch_t_env.execute_sql("""
CREATE TEMPORARY VIEW sales_input AS
SELECT * FROM (VALUES
  (1, 10, 100),
  (2, 10, 200),
  (3, 10, 300)
) AS T(sale_id, product_id, amount)
""")

# Aggregation
agg = batch_t_env.sql_query("""
SELECT 
  product_id,
  SUM(amount) AS total_amount,
  COUNT(sale_id) AS num_sales
FROM sales_input
GROUP BY product_id
""")

# Print sink
batch_t_env.execute_sql("DROP TABLE IF EXISTS print_out")
batch_t_env.execute_sql("""
CREATE TEMPORARY TABLE print_out (
  product_id INT,
  total_amount BIGINT,
  num_sales BIGINT
) WITH ('connector' = 'print')
""")

# Execute and show final result (in streaming mode a bounded view will typically emit once)
agg.execute_insert("print_out").wait()

2025-12-02T19:37:00.233790Z Thread-3 ERROR Reconfiguration failed: No configuration found for '59509e8a' at 'null' in 'null'
+I[10, 600, 3]


In [25]:
# NOTICE, you got only one +I, there is no -U, +U, -D modes for batch
# because batch does not have changelog
# ask question as many times, if you don't understand this.
# if you don't understand this, flink development will be difficult

