In [1]:

from pyflink.common import Types
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.table import (DataTypes, TableDescriptor, Schema, StreamTableEnvironment)
from pyflink.table.expressions import col
from pyflink.table.udf import udf

In [2]:

def mixing_use_of_datastream_and_table():
    # use StreamTableEnvironment instead of TableEnvironment when mixing use of table & datastream
    env = StreamExecutionEnvironment.get_execution_environment()
    t_env = StreamTableEnvironment.create(stream_execution_environment=env)

    # define the source
    t_env.create_temporary_table(
        'source',
        TableDescriptor.for_connector('datagen')
                       .schema(Schema.new_builder()
                               .column('id', DataTypes.BIGINT())
                               .column('data', DataTypes.STRING())
                               .build())
                       .option("number-of-rows", "10")
                       .build())

    # define the sink
    t_env.create_temporary_table(
        'sink',
        TableDescriptor.for_connector('print')
                       .schema(Schema.new_builder()
                               .column('a', DataTypes.BIGINT())
                               .build())
                       .build())

    @udf(result_type=DataTypes.BIGINT())
    def length(data):
        return len(data)

    # perform table api operations
    table = t_env.from_path("source")
    table = table.select(col('id'), length(col('data')))

    # convert table to datastream and perform datastream api operations
    ds = t_env.to_data_stream(table)
    ds = ds.map(lambda i: i[0] + i[1], output_type=Types.LONG())

    # convert datastream to table and perform table api operations as you want
    table = t_env.from_data_stream(
        ds,
        Schema.new_builder().column("f0", DataTypes.BIGINT()).build())

    # execute
    table.execute_insert('sink') \
         .wait()
    # remove .wait if submitting to a remote cluster, refer to
    # https://nightlies.apache.org/flink/flink-docs-stable/docs/dev/python/faq/#wait-for-jobs-to-finish-when-executing-jobs-in-mini-cluster
    # for more details


In [3]:
mixing_use_of_datastream_and_table()

2> +I[-4674160444308977755]
2> +I[-3216646488221651312]
2> +I[-5839594202457183104]
1> +I[-6331463005102648867]
1> +I[-5075457498632715727]
1> +I[-2229617544269832232]
4> +I[-7331264594802876497]
4> +I[-104516378337137976]
3> +I[-5250674592613494937]
3> +I[896206917790742712]
