## Segmentation in Pinot

Table contents in Pinot are expected to grow infinitely and thus need to be distributed across multiple nodes. The dataset is split into segments, which are comparable to shards/partitions in classical RDBMS. Segmentation is done in a time-based fashion, meaning that rows in a given segment will be timewisely close to each other.
Segments store all columns of a table and organize data in columnar orientation for high encoding efficiency and optional pre-aggregation of metrics. In addition to values, segments store indices and other lookup-related data structures like dictionaries. By default values are stored using dictionary encoding, meaning that values are represented as dictionary IDs that reference a corresponding dictionary entry. This way, values can be stored with the minimum number of bits required, which depends on the cardinality of the column. 

In [128]:
# all imports
import requests
import json
import io
import re
import pandas as pd
from kafka import KafkaConsumer

In [2]:
# some helpers
def query_sql(query):
    return requests.post('http://pinot-broker.pinot:8099/query/sql', json={
        "sql" : query
    }).json()

In [3]:
consumer = KafkaConsumer(group_id='test', bootstrap_servers=['pinot-kafka.pinot:9092'])
consumer.topics()

{'trips', 'trips_gendata', 'trips_gendata2'}

In [4]:
requests.get('http://pinot-controller.pinot:9000/schemas/trips').json()

{'schemaName': 'trips',
 'dimensionFieldSpecs': [{'name': 'rider_name',
   'dataType': 'STRING',
   'defaultNullValue': ''},
  {'name': 'driver_name', 'dataType': 'STRING', 'defaultNullValue': ''},
  {'name': 'license_plate', 'dataType': 'STRING', 'defaultNullValue': ''},
  {'name': 'start_location', 'dataType': 'STRING', 'defaultNullValue': ''},
  {'name': 'start_zip_code', 'dataType': 'STRING', 'defaultNullValue': ''},
  {'name': 'end_location', 'dataType': 'STRING', 'defaultNullValue': ''},
  {'name': 'end_zip_code', 'dataType': 'STRING', 'defaultNullValue': ''},
  {'name': 'rider_is_premium', 'dataType': 'INT', 'defaultNullValue': 0}],
 'metricFieldSpecs': [{'name': 'count',
   'dataType': 'LONG',
   'defaultNullValue': 1},
  {'name': 'payment_amount', 'dataType': 'FLOAT'},
  {'name': 'payment_tip_amount', 'dataType': 'FLOAT'},
  {'name': 'trip_wait_time_millis', 'dataType': 'LONG'},
  {'name': 'rider_rating', 'dataType': 'INT'},
  {'name': 'driver_rating', 'dataType': 'INT'}],
 'd

In [153]:
table_config = {
  "tableName": "",
  "tableType": "REALTIME",
  "segmentsConfig": {
    "timeColumnName": "trip_start_time_millis",
    "timeType": "MILLISECONDS",
    "retentionTimeUnit": "DAYS",
    "retentionTimeValue": "60",
    "schemaName": "trips",
    "replication": "1",
    "replicasPerPartition": "1"
  },
  "tenants": {},
  "tableIndexConfig": {
    "loadMode": "MMAP",
    "invertedIndexColumns": [
        "rider_name",
        "driver_name",
        "start_location",
        "end_location"
    ],
    "streamConfigs": {
      "streamType": "kafka",
      "stream.kafka.consumer.type": "simple",
      "stream.kafka.topic.name": "trips_gendata",
      "stream.kafka.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
      "stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
      "stream.kafka.zk.broker.url": "pinot-kafka-zookeeper:2181",
      "stream.kafka.broker.list": "pinot-kafka:9092",
      "realtime.segment.flush.threshold.time": "12h",
      "realtime.segment.flush.threshold.size": "5000",
      "stream.kafka.consumer.prop.auto.offset.reset": "smallest"
    }
  },
  "metadata": {
    "customConfigs": {}
  }
}

table_config["tableName"] = "trips_segmentation_1"
print(requests.post('http://pinot-controller.pinot:9000/tables', json=table_config).json())

table_config["tableName"] = "trips_segmentation_2"
table_config["segmentsConfig"]["replication"] = "3"
table_config["segmentsConfig"]["replicasPerPartition"] = "3"
print(requests.post('http://pinot-controller.pinot:9000/tables', json=table_config).json())

{'status': 'Table trips_segmentation_1_REALTIME succesfully added'}
{'status': 'Table trips_segmentation_2_REALTIME succesfully added'}


In [87]:
requests.get('http://pinot-controller.pinot:9000/tables/trips_segmentation_1/instances').json()

{'tableName': 'trips_segmentation_1',
 'brokers': [{'tableType': 'realtime',
   'instances': ['Broker_pinot-broker-0.pinot-broker-headless.pinot.svc.cluster.local_8099']}],
 'server': [{'tableType': 'realtime',
   'instances': ['Server_pinot-server-2.pinot-server-headless.pinot.svc.cluster.local_8098',
    'Server_pinot-server-0.pinot-server-headless.pinot.svc.cluster.local_8098',
    'Server_pinot-server-1.pinot-server-headless.pinot.svc.cluster.local_8098']}]}

In [143]:
response = requests.get('http://pinot-controller.pinot:9000/segments/trips_segmentation_2').json()
segments = response[0]['REALTIME']
segments

['trips_segmentation_2__0__0__20210330T2311Z',
 'trips_segmentation_2__0__1__20210330T2311Z',
 'trips_segmentation_2__0__2__20210330T2311Z',
 'trips_segmentation_2__0__3__20210330T2311Z',
 'trips_segmentation_2__0__4__20210330T2311Z',
 'trips_segmentation_2__0__5__20210330T2311Z',
 'trips_segmentation_2__0__6__20210330T2311Z']

In [149]:
segment_metadata = {}

for segment in segments:
    segment_metadata[segment] = requests.get(f'http://pinot-controller.pinot:9000/segments/trips_segmentation_2/{segment}/metadata').json()

pd.DataFrame(segment_metadata)

Unnamed: 0,trips_segmentation_2__0__0__20210330T2311Z,trips_segmentation_2__0__1__20210330T2311Z,trips_segmentation_2__0__2__20210330T2311Z,trips_segmentation_2__0__3__20210330T2311Z,trips_segmentation_2__0__4__20210330T2311Z,trips_segmentation_2__0__5__20210330T2311Z,trips_segmentation_2__0__6__20210330T2311Z
segment.realtime.endOffset,5000,10000,15000,20000,25000,30000,9223372036854775807
segment.time.unit,MILLISECONDS,MILLISECONDS,MILLISECONDS,MILLISECONDS,MILLISECONDS,MILLISECONDS,
segment.start.time,1585265662776,1585268275846,1585267959427,1585284424212,1585277751053,1585286611249,-1
segment.flush.threshold.size,5000,5000,5000,5000,5000,5000,5000
segment.realtime.startOffset,0,5000,10000,15000,20000,25000,30000
segment.end.time,1616849411120,1616837071664,1616842789082,1616846900978,1616842225866,1616845011471,-1
segment.total.docs,5000,5000,5000,5000,5000,5000,-1
segment.table.name,trips_segmentation_2_REALTIME,trips_segmentation_2_REALTIME,trips_segmentation_2_REALTIME,trips_segmentation_2_REALTIME,trips_segmentation_2_REALTIME,trips_segmentation_2_REALTIME,trips_segmentation_2_REALTIME
segment.realtime.numReplicas,1,1,1,1,1,1,1
segment.creation.time,1617145895857,1617145897250,1617145898917,1617145900066,1617145901135,1617145902314,1617145903383


In [150]:
query_sql("select count(*) from trips_segmentation_2")

{'resultTable': {'dataSchema': {'columnDataTypes': ['LONG'],
   'columnNames': ['count(*)']},
  'rows': [[32997]]},
 'exceptions': [],
 'numServersQueried': 1,
 'numServersResponded': 1,
 'numSegmentsQueried': 7,
 'numSegmentsProcessed': 7,
 'numSegmentsMatched': 7,
 'numConsumingSegmentsQueried': 1,
 'numDocsScanned': 32997,
 'numEntriesScannedInFilter': 0,
 'numEntriesScannedPostFilter': 0,
 'numGroupsLimitReached': False,
 'totalDocs': 32997,
 'timeUsedMs': 6,
 'segmentStatistics': [],
 'traceInfo': {},
 'minConsumingFreshnessTimeMs': 1617145903741}

## Query Routing / Processing

Brokers are responsible for maintaining routing tables, which contain mappings between segments of a table and servers where they are hosted on. This allows brokers to efficiently scatter received queries across servers.

In [174]:
externalview = requests.get('http://pinot-controller.pinot:9000/tables/trips_segmentation_2/externalview').json()
#print(externalview)
externalview_data = {}

server_name_regex = re.compile('pinot-server-[0-9]+')

for segment, segment_servers in externalview['REALTIME'].items():
    externalview_data[segment] = {}
    for server, state in segment_servers.items():
        server_name = server_name_regex.search(server).group()
        try:
            externalview_data[segment][state].append(server_name)
        except KeyError:
            externalview_data[segment][state] = [server_name]

pd.DataFrame(externalview_data).transpose()

Unnamed: 0,ONLINE,CONSUMING
trips_segmentation_2__0__0__20210330T2317Z,"[pinot-server-0, pinot-server-1, pinot-server-2]",
trips_segmentation_2__0__1__20210330T2317Z,[pinot-server-0],
trips_segmentation_2__0__2__20210330T2317Z,[pinot-server-0],
trips_segmentation_2__0__3__20210330T2317Z,[pinot-server-0],
trips_segmentation_2__0__4__20210330T2317Z,[pinot-server-0],
trips_segmentation_2__0__5__20210330T2317Z,[pinot-server-0],
trips_segmentation_2__0__6__20210330T2317Z,,[pinot-server-0]
