# Goal

In order to test metabase's feature of mapping data onto custom GeoJSON polygons, I'll create:

1. a GeoJSON file, containing a FeatureCollection of Polygon Features.
2. a CSV file, containing some data to map onto those polygons.

# Setup

In [221]:
import json
import pandas as pd
import numpy as np

In [222]:
# parameters
n_lat = 20  # number of polygons in the latitude direction
n_lon = 20  # number of polygons in the longitude direction
step_lat = 0.01  # width of a polygon in the latitude direction, in degrees
step_lon = 0.02  # width of a polygon in the longitude direction, in degrees
min_lat = 46.5  # minimum latitude, in degrees
min_lon = 6.6  # minimum longitude, in degrees
abs_dir = '/Users/taabepa1/Documents/jupyter/01_make_grid_data'
fname_geojson = 'grid.json'
fname_maps = 'maps.csv'
fname_spec = 'spec.json'

# GeoJSON file

This is the grid definition.

In [223]:
lf = []  # list of Features
for y in range(n_lat):
    for x in range(n_lon):
        id_string = '%d-%d' % (y, x)
        name = 'Sector %s' % id_string
        min_y = min_lat + y * step_lat
        min_x = min_lon + x * step_lon
        coords = [[
            (min_x, min_y),
            (min_x + step_lon, min_y),
            (min_x + step_lon, min_y + step_lat),
            (min_x, min_y + step_lat),
            (min_x, min_y)
        ]]
        geo = {
            'type': 'Polygon',
            'coordinates': coords,
        }
        prop = {
            'id': id_string,
            'name': name
        }
        feat = {
            'type': 'Feature',
            'geometry': geo,
            'properties': prop
        }
        lf.append(feat)

In [224]:
fc = {
    'type': 'FeatureCollection',
    'features': lf,
}

In [225]:
with open(fname_geojson, 'w') as outfile0:
    json.dump(fc, outfile0)

# Random data

In [226]:
index_list = []
for y in range(n_lat):
    for x in range(n_lon):
        index_list.append('%d-%d' % (y, x))
n = len(index_list)
contents = {
    'random_stuff' : np.random.rand(n),
    'regular_stuff' : np.linspace(0, n - 1, n)
}
data = pd.DataFrame(contents, index=index_list)
data['timestamp'] = '2017-06-26'
data.head()

Unnamed: 0,random_stuff,regular_stuff,timestamp
0-0,0.003203,0.0,2017-06-26
0-1,0.953356,1.0,2017-06-26
0-2,0.389492,2.0,2017-06-26
0-3,0.305579,3.0,2017-06-26
0-4,0.220497,4.0,2017-06-26


In [227]:
data.to_csv(fname_maps, header=False)

# Ingestion spec

In [228]:
inputSpec = {
    'type': 'static',
    'paths': abs_dir + '/' + fname_maps
}
ioConfig = {
    'type': 'hadoop',
    'inputSpec': inputSpec
}
granularitySpec = {
    'type': 'uniform',
    'segmentGranularity': 'day',
    'queryGranularity': 'none',
    'intervals': [
        '2017-06-26/2017-06-27'
    ]
}
columns = [
    'id',
    'random_stuff',
    'timestamp'
]
timestampSpec = {
    'format': 'auto',
    'column': 'timestamp'
}
dimensions = [
    'id'
]
metricSpec = [
    {"name" : "random_stuff", "type" : "doubleSum", "fieldName" : "random_stuff"}
]
dimensionSpec = {
    'dimensions': dimensions
}
parseSpec = {
    'format': 'csv',
    'timestampSpec': timestampSpec,
    'columns': columns,
    'dimensionsSpec': dimensionSpec
}
parser = {
    'type': 'string',
    'parseSpec': parseSpec
}
dataSchema = {
    'dataSource': 'maps',
    'granularitySpec': granularitySpec,
    'parser': parser,
    'metricsSpec': metricSpec
}
partitionsSpec = {
    'type': 'hashed',
    'targetPartitionSize': 5000000
}
tuningConfig = {
    'type': 'hadoop',
    'partitionsSpec': partitionsSpec,
    'jobProperties': {}
}
specSpec = {
    'ioConfig': ioConfig,
    'dataSchema': dataSchema,
    'tuningConfig': tuningConfig
}
spec = {
    'type': 'index_hadoop',
    'spec': specSpec
}

In [229]:
with open(fname_spec, 'w') as outfile1:
    json.dump(spec, outfile1)