In [1]:
import logging
import json
import time
import requests
import os

In [2]:
DRUID_HOST = os.getenv('DRUID_HOST')
DRUID_USER = os.getenv('DRUID_USER')
DRUID_PASSWORD = os.getenv('DRUID_PASSWORD')
DRUID_PORT = os.getenv('DRUID_PORT')

In [3]:
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

In [4]:
# Create paylod to update Druid's data
golden_table_payload = """{
  "type": "index_parallel",
  "spec": {
    "ioConfig": {
      "type": "index_parallel",
      "inputSource": {
        "type": "local",
        "baseDir": "/opt/data/golden/druid/house_prices",
        "filter": "*.csv"
      },
      "inputFormat": {
        "type": "tsv",
        "findColumnsFromHeader": false,
        "columns": [
          "property_id",
          "city",
          "attributes_inside_attic",
          "attributes_inside_cellar",
          "attributes_technology_dishwasher",
          "attributes_technology_cable_tv",
          "attributes_outside_balcony",
          "attributes_outside_playground",
          "attributes_outside_parking",
          "attributes_outside_garage",
          "number_of_rooms",
          "surface_property",
          "surface_usable",
          "surface_living",
          "normalized_price"
        ]
      }
    },
    "tuningConfig": {
      "type": "index_parallel",
      "partitionsSpec": {
        "type": "dynamic"
      }
    },
    "dataSchema": {
      "dataSource": "house_prices",
      "timestampSpec": {
        "column": "!!!_no_such_column_!!!",
        "missingValue": "2010-01-01T00:00:00Z"
      },
      "dimensionsSpec": {
        "dimensions": [
          {
            "type": "long",
            "name": "property_id"
          },
          "city",
          {
            "type": "long",
            "name": "attributes_inside_attic"
          },
          {
            "type": "long",
            "name": "attributes_inside_cellar"
          },
          {
            "type": "long",
            "name": "attributes_technology_dishwasher"
          },
          {
            "type": "long",
            "name": "attributes_technology_cable_tv"
          },
          {
            "type": "long",
            "name": "attributes_outside_balcony"
          },
          {
            "type": "long",
            "name": "attributes_outside_playground"
          },
          {
            "type": "long",
            "name": "attributes_outside_parking"
          },
          {
            "type": "long",
            "name": "attributes_outside_garage"
          },
          {
            "type": "double",
            "name": "number_of_rooms"
          },
          {
            "type": "long",
            "name": "surface_property"
          },
          {
            "type": "long",
            "name": "surface_usable"
          },
          {
            "type": "long",
            "name": "surface_living"
          },
          {
            "type": "long",
            "name": "normalized_price"
          }
        ]
      },
      "granularitySpec": {
        "queryGranularity": "none",
        "rollup": false,
        "segmentGranularity": "hour"
      }
    }
  }
}"""

In [5]:
logger.info('Posting data to Druid ...')

golden_table_r = requests.post(
    f'http://{DRUID_HOST}:8888/druid/indexer/v1/task',
    json = json.loads(golden_table_payload))

logger.info(f'Received status: {golden_table_r.status_code} text: {golden_table_r.text}')


2022-07-10 21:59:27,017 [INFO] Posting data to Druid ...
2022-07-10 21:59:27,797 [INFO] Received status: 200 text: {"task":"index_parallel_house_prices_cpnjceji_2022-07-10T21:59:27.718Z"}


In [6]:
# Create paylod to update Druid's data
predictions_table_payload = """{
  "type": "index_parallel",
  "spec": {
    "ioConfig": {
      "type": "index_parallel",
      "inputSource": {
        "type": "local",
        "baseDir": "/opt/data/golden/druid/house_prices_predictions",
        "filter": "*.csv"
      },
      "inputFormat": {
        "type": "tsv",
        "findColumnsFromHeader": false,
        "columns": [
          "property_id",
          "city",
          "attributes_inside_attic",
          "attributes_inside_cellar",
          "attributes_technology_dishwasher",
          "attributes_technology_cable_tv",
          "attributes_outside_balcony",
          "attributes_outside_playground",
          "attributes_outside_parking",
          "attributes_outside_garage",
          "number_of_rooms",
          "surface_property",
          "surface_usable",
          "surface_living",
          "normalized_price",
          "prediction"
        ]
      }
    },
    "tuningConfig": {
      "type": "index_parallel",
      "partitionsSpec": {
        "type": "dynamic"
      }
    },
    "dataSchema": {
      "dataSource": "house_prices_predictions",
      "timestampSpec": {
        "column": "!!!_no_such_column_!!!",
        "missingValue": "2010-01-01T00:00:00Z"
      },
      "dimensionsSpec": {
        "dimensions": [
          {
            "type": "long",
            "name": "property_id"
          },
          "city",
          {
            "type": "long",
            "name": "attributes_inside_attic"
          },
          {
            "type": "long",
            "name": "attributes_inside_cellar"
          },
          {
            "type": "long",
            "name": "attributes_technology_dishwasher"
          },
          {
            "type": "long",
            "name": "attributes_technology_cable_tv"
          },
          {
            "type": "long",
            "name": "attributes_outside_balcony"
          },
          {
            "type": "long",
            "name": "attributes_outside_playground"
          },
          {
            "type": "long",
            "name": "attributes_outside_parking"
          },
          {
            "type": "long",
            "name": "attributes_outside_garage"
          },
          {
            "type": "double",
            "name": "number_of_rooms"
          },
          {
            "type": "long",
            "name": "surface_property"
          },
          {
            "type": "long",
            "name": "surface_usable"
          },
          {
            "type": "long",
            "name": "surface_living"
          },
          {
            "type": "long",
            "name": "normalized_price"
          },
          {
            "type": "double",
            "name": "prediction"
          }
        ]
      },
      "granularitySpec": {
        "queryGranularity": "none",
        "rollup": false,
        "segmentGranularity": "hour"
      }
    }
  }
}"""

In [7]:
logger.info('Posting data to Druid ...')

predictions_table_r = requests.post(
    f'http://{DRUID_HOST}:8888/druid/indexer/v1/task',
    json = json.loads(predictions_table_payload))

logger.info(f'Received status: {predictions_table_r.status_code} text: {predictions_table_r.text}')


2022-07-10 21:59:27,825 [INFO] Posting data to Druid ...
2022-07-10 21:59:27,858 [INFO] Received status: 200 text: {"task":"index_parallel_house_prices_predictions_hcjdjcel_2022-07-10T21:59:27.842Z"}
