In [1]:
# batch_demo.py
from pyflink.table import (
    EnvironmentSettings,
    TableEnvironment,
    DataTypes
)
from pyflink.table.expressions import col
from datetime import datetime



In [2]:

# 1) Create a TableEnvironment in BATCH mode
settings = EnvironmentSettings.in_batch_mode()
t_env = TableEnvironment.create(settings)


2025-12-01T07:36:27.229351Z main ERROR Reconfiguration failed: No configuration found for '12f40c25' at 'null' in 'null'
2025-12-01T07:36:28.385889Z Thread-3 ERROR Reconfiguration failed: No configuration found for '345648c5' at 'null' in 'null'




In [3]:

# 2) Hardcoded data: 5 rows (bounded)
#    Keep column names same as your streaming example
rows = [
    ("alice", "https://a.example/1", datetime(2025, 1, 1, 10, 0, 0)),
    ("bob",   "https://b.example/2", datetime(2025, 1, 1, 10, 1, 0)),
    ("alice", "https://a.example/3", datetime(2025, 1, 1, 10, 2, 0)),
    ("carol", "https://c.example/4", datetime(2025, 1, 1, 10, 3, 0)),
    ("bob",   "https://b.example/5", datetime(2025, 1, 1, 10, 4, 0)),
]

# 3) Define the schema
schema = DataTypes.ROW([
    DataTypes.FIELD("user_id", DataTypes.STRING()),
    DataTypes.FIELD("url", DataTypes.STRING()),
    DataTypes.FIELD("ts", DataTypes.TIMESTAMP_LTZ(3))
])

# 4) Create a Table from the in-memory elements (bounded)
src_table = t_env.from_elements(rows, schema)


In [4]:
# 1) If there is a temporary view, drop it via the API (preferred)
if "clicks_src" in t_env.list_views():
    # drop_temporary_view is an API call and is the most direct
    try:
        t_env.drop_temporary_view("clicks_src")
        print("Dropped temporary view clicks_src")
    except Exception as e:
        print("Failed to drop temporary view:", e)

# 2) Now attempt to drop a permanent view/table if it exists
# Check both tables and views lists to be safe
if "clicks_src" in t_env.list_tables():
    try:
        # If it's a permanent TABLE:
        t_env.execute_sql("DROP TABLE clicks_src")
        print("Dropped permanent table clicks_src")
    except Exception as e:
        # If it's a permanent VIEW, try DROP VIEW
        try:
            t_env.execute_sql("DROP VIEW clicks_src")
            print("Dropped permanent view clicks_src")
        except Exception as e2:
            print("Failed to drop permanent object clicks_src:", e2)
else:
    print("No permanent table/view named clicks_src found.")


No permanent table/view named clicks_src found.


In [5]:

# Register it as a temporary view so we can run SQL on it
t_env.execute_sql("DROP VIEW  IF EXISTS clicks_src")

t_env.create_temporary_view("clicks_src", src_table)

# -----------------------
# Approach A — Query & collect results in Python (recommended for teaching)
# -----------------------
print("=== Approach A: collect() results in Python ===")

query = """
SELECT user_id, COUNT(*) AS cnt
FROM clicks_src
GROUP BY user_id
ORDER BY user_id
"""
# Execute query and fetch results (this runs and completes because source is bounded)
table_result = t_env.sql_query(query).execute()

# table_result.collect() yields Row objects; iterate and print
with table_result.collect() as results:
    for row in results:
        # Row looks like Row(user_id='alice', cnt=2)
        print(row)

=== Approach A: collect() results in Python ===
2025-12-01T07:42:25.802470Z Thread-3 ERROR Reconfiguration failed: No configuration found for '3af04fdc' at 'null' in 'null'
<Row('alice', 2)>
<Row('bob', 2)>
<Row('carol', 1)>


In [9]:


# -----------------------
# Approach B — Insert into a print sink (Table pipeline finishes automatically)
# -----------------------
print("\n=== Approach B: INSERT INTO print sink (job runs and exits) ===")

# Create a print sink table. The print connector prints to stdout and is fine for batch demos.
t_env.execute_sql("""
CREATE TEMPORARY  TABLE IF NOT EXISTS out_print (
  user_id STRING,
  cnt BIGINT
) WITH (
  'connector' = 'print'
)
""")
 



=== Approach B: INSERT INTO print sink (job runs and exits) ===


<pyflink.table.table_result.TableResult at 0x7f9b49b96dd0>

In [10]:

# Insert into the print sink; because source is bounded, pipeline will finish automatically.
# CREATE DATA FLOW GRAPH 

insert_result = t_env.execute_sql("""
INSERT INTO out_print
SELECT user_id, COUNT(*) AS cnt
FROM clicks_src
GROUP BY user_id
ORDER BY user_id
""")

# In batch mode, execute_sql for INSERT will run synchronously and return when finished.
print("Finished INSERT into print sink. (The printed rows above are the sink output.)")

2025-12-01T07:48:25.831680Z Thread-3 ERROR Reconfiguration failed: No configuration found for '59b2c995' at 'null' in 'null'
Finished INSERT into print sink. (The printed rows above are the sink output.)
+I[alice, 2]
+I[bob, 2]
+I[carol, 1]
