In [2]:
## consume to mongodb
from kafka import KafkaConsumer
import sys, json, pymongo
# from elasticsearch import Elasticsearch
# import time, datetime


if __name__ == "__main__":

    # 與 MongoDB連線
    client = pymongo.MongoClient(host="mongodb", port=27017)
    # 指定為 test 資料庫
    db = client.test
    # 指定 temp_humidity 集合, MongoDB的每個資料庫又包含許多集合(collection), 類似於關聯性資料庫中的表
    collection = db.bpm

    # 設定要連線到Kafka集群的相關設定, 產生一個Kafka的Consumer的實例
    consumer = KafkaConsumer(
        # 指定Kafka集群伺服器
        bootstrap_servers=["kafka:9092"],
        # ConsumerGroup的名稱, 可以不指定
        #group_id="cg_001",
        # 指定msgKey的反序列化器, 若Key為None, 無法反序列化
        # key_deserializer=bytes.decode,
        # 指定msgValue的反序列化器
        #value_deserializer=bytes.decode,
        value_deserializer=lambda m: json.loads(m.decode('ascii')),
        # 是否從這個ConsumerGroup尚未讀取的partition / offset開始讀
        auto_offset_reset="earliest",
    )
   
    # 讓Consumer向Kafka集群訂閱指定的topic
    consumer.subscribe(topics="bpm")
    
    #es = Elasticsearch('http://elasticsearch:9200')
    
    
    # 持續的拉取Kafka有進來的訊息
    try:
        print("Now listening for incoming messages ...")
        # 持續監控是否有新的record進來
        for record in consumer:
            topic = record.topic
            partition = record.partition
            offset = record.offset
            timestamp = record.timestamp
            # 取出msgKey與msgValue
            msgKey = record.key
            msgValue = record.value
            # 秀出metadata與msgKey & msgValue訊息
            print("topic=%s, partition=%s, offset=%s : (key=%s, value=%s)" % (record.topic, record.partition, 
                                                                              record.offset, record.key, record.value))
            j = {"device_id": msgValue["device_id"], "timestamp": msgValue["timestamp"], "bpm": msgValue["bpm"]}
            print("json=",  j)
            
            # 將資料存入 mongodb
            # 存入單筆
            result = collection.insert_one(j)
            # 存入多筆
            #result = collection.insert_many()
            print(result)
            #res = es.index(index="temp_humidity1", doc_type='tweet', id=datetime.now(), body=j)
            #print(res['result'])

    except:
        # 錯誤處理
        e_type, e_value, e_traceback = sys.exc_info()
        print("type ==> %s" % (e_type))
        print("value ==> %s" % (e_value))
        print("traceback ==> file name: %s" % (e_traceback.tb_frame.f_code.co_filename))
        print("traceback ==> line no: %s" % (e_traceback.tb_lineno))
        print("traceback ==> function name: %s" % (e_traceback.tb_frame.f_code.co_name))
    finally:
        consumer.close()

Now listening for incoming messages ...
topic=bpm, partition=0, offset=0 : (key=None, value={'timestamp': '2019-09-21 16:06:18', 'bpm': '43', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': '2019-09-21 16:06:18', 'bpm': '43'}
<pymongo.results.InsertOneResult object at 0x7fa5a44bdf88>
topic=bpm, partition=0, offset=1 : (key=None, value={'timestamp': '2019-09-21 16:06:19', 'bpm': '44', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': '2019-09-21 16:06:19', 'bpm': '44'}
<pymongo.results.InsertOneResult object at 0x7fa5a44bdfc8>
topic=bpm, partition=0, offset=2 : (key=None, value={'timestamp': '2019-09-21 16:06:21', 'bpm': '42', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': '2019-09-21 16:06:21', 'bpm': '42'}
<pymongo.results.InsertOneResult object at 0x7fa5a4458ac8>
topic=bpm, partition=0, offset=3 : (key=None, value={'timestamp': '2019-09-21 16:06:22', 'bpm': '43', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': '2019-0

topic=bpm, partition=0, offset=32 : (key=None, value={'timestamp': '2019-09-21 16:06:47', 'bpm': '95', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': '2019-09-21 16:06:47', 'bpm': '95'}
<pymongo.results.InsertOneResult object at 0x7fa5a5919c88>
topic=bpm, partition=0, offset=33 : (key=None, value={'timestamp': '2019-09-21 16:06:49', 'bpm': '89', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': '2019-09-21 16:06:49', 'bpm': '89'}
<pymongo.results.InsertOneResult object at 0x7fa5a44696c8>
topic=bpm, partition=0, offset=34 : (key=None, value={'timestamp': '2019-09-21 16:06:49', 'bpm': '90', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': '2019-09-21 16:06:49', 'bpm': '90'}
<pymongo.results.InsertOneResult object at 0x7fa5a5919c88>
topic=bpm, partition=0, offset=35 : (key=None, value={'timestamp': '2019-09-21 16:07:01', 'bpm': '57', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': '2019-09-21 16:07:01', 'bpm': '57'}
<pymong

In [5]:
## consume to Elasticsearch
from kafka import KafkaConsumer
import sys, json
# import pymongo
from elasticsearch import Elasticsearch
import time, datetime


if __name__ == "__main__":

    # 與 MongoDB連線
#     client = pymongo.MongoClient(host="mongodb", port=27017)
#     # 指定為 test 資料庫
#     db = client.test
#     # 指定 temp_humidity 集合, MongoDB的每個資料庫又包含許多集合(collection), 類似於關聯性資料庫中的表
#     collection = db.bpm

    # 設定要連線到Kafka集群的相關設定, 產生一個Kafka的Consumer的實例
    consumer = KafkaConsumer(
        # 指定Kafka集群伺服器
        bootstrap_servers=["kafka:9092"],
        # ConsumerGroup的名稱, 可以不指定
        #group_id="cg_001",
        # 指定msgKey的反序列化器, 若Key為None, 無法反序列化
        # key_deserializer=bytes.decode,
        # 指定msgValue的反序列化器
        #value_deserializer=bytes.decode,
        value_deserializer=lambda m: json.loads(m.decode('ascii')),
        # 是否從這個ConsumerGroup尚未讀取的partition / offset開始讀
        auto_offset_reset="earliest",
    )
   
    # 讓Consumer向Kafka集群訂閱指定的topic
    consumer.subscribe(topics="bpm")
    
    es = Elasticsearch('http://elasticsearch:9200')
    
    
    # 持續的拉取Kafka有進來的訊息
    try:
        print("Now listening for incoming messages ...")
        # 持續監控是否有新的record進來
        for record in consumer:
            topic = record.topic
            partition = record.partition
            offset = record.offset
            timestamp = record.timestamp
            # 取出msgKey與msgValue
            msgKey = record.key
            msgValue = record.value
            # 秀出metadata與msgKey & msgValue訊息
            time = datetime.datetime.now()
            print("topic=%s, partition=%s, offset=%s : (key=%s, value=%s)" % (record.topic, record.partition, 
                                                                              record.offset, record.key, record.value))
            j = {"device_id": msgValue["device_id"], "timestamp": time, "bpm": int(msgValue["bpm"])}
            print("json=",  j)
            
#             # 將資料存入 mongodb
#             # 存入單筆
#             result = collection.insert_one(j)
#             # 存入多筆
#             # result = collection.insert_many()
#             print(result)


            res = es.index(index="bpm", doc_type='tweet', id=time, body=j)
            print(res['result'])

    except:
        # 錯誤處理
        e_type, e_value, e_traceback = sys.exc_info()
        print("type ==> %s" % (e_type))
        print("value ==> %s" % (e_value))
        print("traceback ==> file name: %s" % (e_traceback.tb_frame.f_code.co_filename))
        print("traceback ==> line no: %s" % (e_traceback.tb_lineno))
        print("traceback ==> function name: %s" % (e_traceback.tb_frame.f_code.co_name))
    finally:
        consumer.close()

Now listening for incoming messages ...
topic=bpm, partition=0, offset=0 : (key=None, value={'timestamp': '2019-09-21 16:28:39', 'bpm': '65', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 32, 45, 158675), 'bpm': 65}
created
topic=bpm, partition=0, offset=1 : (key=None, value={'timestamp': '2019-09-21 16:28:41', 'bpm': '60', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 32, 45, 228428), 'bpm': 60}
created
topic=bpm, partition=0, offset=2 : (key=None, value={'timestamp': '2019-09-21 16:28:42', 'bpm': '59', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 32, 45, 234481), 'bpm': 59}
created
topic=bpm, partition=0, offset=3 : (key=None, value={'timestamp': '2019-09-21 16:28:42', 'bpm': '61', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 32, 45, 241677), 'bpm': 61}
created
topic=bpm, p

topic=bpm, partition=0, offset=51 : (key=None, value={'timestamp': '2019-09-21 16:33:28', 'bpm': '74', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 33, 28, 369907), 'bpm': 74}
created
topic=bpm, partition=0, offset=52 : (key=None, value={'timestamp': '2019-09-21 16:33:29', 'bpm': '69', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 33, 29, 706261), 'bpm': 69}
created
topic=bpm, partition=0, offset=53 : (key=None, value={'timestamp': '2019-09-21 16:33:30', 'bpm': '69', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 33, 30, 517323), 'bpm': 69}
created
topic=bpm, partition=0, offset=54 : (key=None, value={'timestamp': '2019-09-21 16:33:33', 'bpm': '194', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 33, 33, 843138), 'bpm': 194}
created
topic=bpm, partition=0, offset=55 : (key=None,

topic=bpm, partition=0, offset=86 : (key=None, value={'timestamp': '2019-09-21 16:35:47', 'bpm': '40', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 35, 47, 790631), 'bpm': 40}
created
topic=bpm, partition=0, offset=87 : (key=None, value={'timestamp': '2019-09-21 16:35:48', 'bpm': '42', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 35, 48, 729843), 'bpm': 42}
created
topic=bpm, partition=0, offset=88 : (key=None, value={'timestamp': '2019-09-21 16:35:49', 'bpm': '44', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 35, 49, 385089), 'bpm': 44}
created
topic=bpm, partition=0, offset=89 : (key=None, value={'timestamp': '2019-09-21 16:35:50', 'bpm': '46', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 35, 50, 192599), 'bpm': 46}
created
topic=bpm, partition=0, offset=90 : (key=None, v

topic=bpm, partition=0, offset=121 : (key=None, value={'timestamp': '2019-09-21 16:36:17', 'bpm': '72', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 36, 17, 480332), 'bpm': 72}
created
topic=bpm, partition=0, offset=122 : (key=None, value={'timestamp': '2019-09-21 16:36:18', 'bpm': '72', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 36, 18, 346535), 'bpm': 72}
created
topic=bpm, partition=0, offset=123 : (key=None, value={'timestamp': '2019-09-21 16:36:19', 'bpm': '71', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 36, 19, 247794), 'bpm': 71}
created
topic=bpm, partition=0, offset=124 : (key=None, value={'timestamp': '2019-09-21 16:36:20', 'bpm': '70', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 36, 20, 195319), 'bpm': 70}
created
topic=bpm, partition=0, offset=125 : (key=No

topic=bpm, partition=0, offset=156 : (key=None, value={'timestamp': '2019-09-21 16:36:54', 'bpm': '86', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 36, 54, 214466), 'bpm': 86}
created
topic=bpm, partition=0, offset=157 : (key=None, value={'timestamp': '2019-09-21 16:36:54', 'bpm': '84', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 36, 54, 928713), 'bpm': 84}
created
topic=bpm, partition=0, offset=158 : (key=None, value={'timestamp': '2019-09-21 16:36:55', 'bpm': '89', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 36, 55, 682851), 'bpm': 89}
created
topic=bpm, partition=0, offset=159 : (key=None, value={'timestamp': '2019-09-21 16:36:56', 'bpm': '93', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 36, 56, 136045), 'bpm': 93}
created
topic=bpm, partition=0, offset=160 : (key=No

topic=bpm, partition=0, offset=191 : (key=None, value={'timestamp': '2019-09-21 16:40:03', 'bpm': '69', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 40, 3, 646221), 'bpm': 69}
created
topic=bpm, partition=0, offset=192 : (key=None, value={'timestamp': '2019-09-21 16:40:04', 'bpm': '73', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 40, 4, 443545), 'bpm': 73}
created
topic=bpm, partition=0, offset=193 : (key=None, value={'timestamp': '2019-09-21 16:40:05', 'bpm': '73', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 40, 5, 295720), 'bpm': 73}
created
topic=bpm, partition=0, offset=194 : (key=None, value={'timestamp': '2019-09-21 16:40:06', 'bpm': '74', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 40, 6, 42252), 'bpm': 74}
created
topic=bpm, partition=0, offset=195 : (key=None, v

topic=bpm, partition=0, offset=226 : (key=None, value={'timestamp': '2019-09-21 16:40:40', 'bpm': '53', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 40, 40, 811366), 'bpm': 53}
created
topic=bpm, partition=0, offset=227 : (key=None, value={'timestamp': '2019-09-21 16:40:41', 'bpm': '56', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 40, 41, 571348), 'bpm': 56}
created
topic=bpm, partition=0, offset=228 : (key=None, value={'timestamp': '2019-09-21 16:40:42', 'bpm': '59', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 40, 42, 286275), 'bpm': 59}
created
topic=bpm, partition=0, offset=229 : (key=None, value={'timestamp': '2019-09-21 16:40:43', 'bpm': '65', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 40, 43, 43367), 'bpm': 65}
created
topic=bpm, partition=0, offset=230 : (key=Non

topic=bpm, partition=0, offset=261 : (key=None, value={'timestamp': '2019-09-21 16:41:31', 'bpm': '96', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 41, 31, 965126), 'bpm': 96}
created
topic=bpm, partition=0, offset=262 : (key=None, value={'timestamp': '2019-09-21 16:41:32', 'bpm': '98', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 41, 32, 431775), 'bpm': 98}
created
topic=bpm, partition=0, offset=263 : (key=None, value={'timestamp': '2019-09-21 16:41:32', 'bpm': '98', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 41, 32, 770472), 'bpm': 98}
created
topic=bpm, partition=0, offset=264 : (key=None, value={'timestamp': '2019-09-21 16:41:33', 'bpm': '106', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 41, 33, 20686), 'bpm': 106}
created
topic=bpm, partition=0, offset=265 : (key=N

topic=bpm, partition=0, offset=296 : (key=None, value={'timestamp': '2019-09-21 16:41:50', 'bpm': '77', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 41, 50, 772872), 'bpm': 77}
created
topic=bpm, partition=0, offset=297 : (key=None, value={'timestamp': '2019-09-21 16:41:51', 'bpm': '74', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 41, 51, 612686), 'bpm': 74}
created
topic=bpm, partition=0, offset=298 : (key=None, value={'timestamp': '2019-09-21 16:41:52', 'bpm': '77', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 41, 52, 353566), 'bpm': 77}
created
topic=bpm, partition=0, offset=299 : (key=None, value={'timestamp': '2019-09-21 16:41:53', 'bpm': '80', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 41, 53, 162842), 'bpm': 80}
created
topic=bpm, partition=0, offset=300 : (key=No

topic=bpm, partition=0, offset=331 : (key=None, value={'timestamp': '2019-09-21 16:42:43', 'bpm': '66', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 42, 43, 975749), 'bpm': 66}
created
topic=bpm, partition=0, offset=332 : (key=None, value={'timestamp': '2019-09-21 16:42:44', 'bpm': '71', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 42, 44, 303291), 'bpm': 71}
created
topic=bpm, partition=0, offset=333 : (key=None, value={'timestamp': '2019-09-21 16:42:44', 'bpm': '75', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 42, 44, 759278), 'bpm': 75}
created
topic=bpm, partition=0, offset=334 : (key=None, value={'timestamp': '2019-09-21 16:42:45', 'bpm': '78', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 42, 45, 32531), 'bpm': 78}
created
topic=bpm, partition=0, offset=335 : (key=Non

topic=bpm, partition=0, offset=366 : (key=None, value={'timestamp': '2019-09-21 16:43:58', 'bpm': '45', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 43, 58, 727026), 'bpm': 45}
created
topic=bpm, partition=0, offset=367 : (key=None, value={'timestamp': '2019-09-21 16:43:59', 'bpm': '49', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 43, 59, 362376), 'bpm': 49}
created
topic=bpm, partition=0, offset=368 : (key=None, value={'timestamp': '2019-09-21 16:44:00', 'bpm': '52', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 44, 0, 465124), 'bpm': 52}
created
topic=bpm, partition=0, offset=369 : (key=None, value={'timestamp': '2019-09-21 16:44:01', 'bpm': '57', 'device_id': 'jimmy'})
json= {'device_id': 'jimmy', 'timestamp': datetime.datetime(2019, 9, 21, 8, 44, 1, 38751), 'bpm': 57}
created
type ==> <class 'KeyboardInterrupt'>
value ==> 

In [8]:
print(datetime.datetime.now())

2019-09-21 08:10:41.544339
