# Test producer notebook
Some remarks:
* This notebook supposes we already have Pulsar running (tested it in Standalone mode)
* To ensure the information sent can later be read asynchronously, in the order we send it, and read multiple times, we have to configure the Pulsar cluster for retaining messages (even after read by a consumer), and with message deduplication (to avoid duplication of messages if we send them more than once). This can be configured in the pulsar's config file, or through `pulsar-admin` command line for the default 'tenant/namespace', with:
  * `pulsar-admin namespaces set-retention public/default --size -1 --time -1`
  * `pulsar-admin namespaces set-deduplication public/default --enable` 

In [8]:
import pulsar
from pulsar import PartitionsRoutingMode

In [10]:
# Function needed when using 'send_async', for when the broker receives the message
def callback(res, msg_id): return

def send_results(results_dictionary, topic_name):
    """Function to send Pulsar the results of a query
    it requires results in a {'key_1':value1, .. 'key_n':value_n} format, and it would be easier
    if we can assume they are ordered in the way we want the consumer to view them
    """
    # Create a pulsar client by supplying ip address and port
    client = pulsar.Client('pulsar://localhost:6650')

    # Use Single Partition to ensure the consumer will read messages in order from the same partition
    producer = client.create_producer(f'persistent://public/default/{topic_name}',
                                     message_routing_mode=PartitionsRoutingMode.UseSinglePartition)

    # Sending messages in a tuple ('key', value) type of formatting
    for key, value in top_languages_dict.items():
        language = f"(\'{key}\'"+", "+ str(value)+")"
        producer.send_async((language).encode('utf-8'), callback)

    # Once we send everything, send a last message signalling an End of String (EOS)
    # and including a property stating EOS=True
    producer.send_async(("EOS").encode('utf-8'), callback, properties=dict(EOS="True"))

    # Destroy the pulsar client
    client.close()

In [11]:
# test list for top languages. Later on this info comes from the queries
top_languages_list = [('JavaScript', 414558), ('Java', 177839), ('Python', 267520), ('CSS', 57083), 
                ('PHP', 50818), ('Ruby', 22368), ('C++', 76267), ('C', 42708), ('Shell', 35257), 
                ('C#', 66798), ('Objective-C', 2120), ('R', 26508), ('VimL', 3812), ('Go', 24685), 
                ('Perl', 2498), ('CoffeeScript', 375), ('TeX', 5473), ('Swift', 14564), ('Scala', 3240), 
                ('Emacs Lisp', 35), ('Haskell', 2009), ('Lua', 5163), ('Clojure', 1278), ('Matlab', 11335), 
                ('Makefile', 7215), ('Groovy', 894), ('Puppet', 182), ('Rust', 11433), ('PowerShell', 4959)]
top_languages_dict = dict(top_languages_list)

send_results(top_languages_dict, 'top_languages')

2022-05-19 14:00:38.120 INFO  [140569026484032] ClientConnection:189 | [<none> -> pulsar://localhost:6650] Create ClientConnection, timeout=10000
2022-05-19 14:00:38.120 INFO  [140569026484032] ConnectionPool:96 | Created connection for pulsar://localhost:6650
2022-05-19 14:00:38.121 INFO  [140568296003328] ClientConnection:375 | [127.0.0.1:45910 -> 127.0.0.1:6650] Connected to broker
2022-05-19 14:00:38.135 INFO  [140568296003328] HandlerBase:64 | [persistent://public/default/top_languages, ] Getting connection from pool
2022-05-19 14:00:38.148 INFO  [140568296003328] ProducerImpl:189 | [persistent://public/default/top_languages, ] Created producer on broker [127.0.0.1:45910 -> 127.0.0.1:6650] 
2022-05-19 14:00:38.151 INFO  [140569026484032] ClientImpl:496 | Closing Pulsar client with 1 producers and 0 consumers
2022-05-19 14:00:38.152 INFO  [140569026484032] ProducerImpl:686 | [persistent://public/default/top_languages, standalone-0-14] Closing producer for topic persistent://public/