# Goal

In order to test metabase's feature of mapping data onto custom GeoJSON polygons, I'll create:

1. a GeoJSON file, containing a FeatureCollection of Polygon Features.
2. a CSV file, containing some data to map onto those polygons.

# Setup

In [39]:
import json
import pandas as pd
import numpy as np

In [40]:
# parameters
n_lat = 20  # number of polygons in the latitude direction
n_lon = 20  # number of polygons in the longitude direction
step_lat = 0.01  # width of a polygon in the latitude direction, in degrees
step_lon = 0.02  # width of a polygon in the longitude direction, in degrees
min_lat = 46.5  # minimum latitude, in degrees
min_lon = 6.6  # minimum longitude, in degrees
abs_dir = '/Users/taabepa1/Documents/jupyter/01_make_grid_data'
fname_geojson = 'grid.json'
fname_maps = 'maps.csv'
fname_spec = 'spec.json'

# GeoJSON file

This is the grid definition.

In [41]:
lf = []  # list of Features
for y in range(n_lat):
    for x in range(n_lon):
        id_string = '%d-%d' % (y, x)
        name = 'Sector %s' % id_string
        min_y = min_lat + y * step_lat
        min_x = min_lon + x * step_lon
        coords = [[
            (min_x, min_y),
            (min_x + step_lon, min_y),
            (min_x + step_lon, min_y + step_lat),
            (min_x, min_y + step_lat),
            (min_x, min_y)
        ]]
        geo = {
            'type': 'Polygon',
            'coordinates': coords,
        }
        prop = {
            'id': id_string,
            'name': name
        }
        feat = {
            'type': 'Feature',
            'geometry': geo,
            'properties': prop
        }
        lf.append(feat)

In [42]:
fc = {
    'type': 'FeatureCollection',
    'features': lf,
}
fc_string = json.dumps(fc)

In [43]:
with open(fname_geojson, 'w') as outfile:
    json.dump(fc, outfile)

# Random data

In [44]:
index_list = []
for y in range(n_lat):
    for x in range(n_lon):
        index_list.append('%d-%d' % (y, x))
n = len(index_list)
data = pd.DataFrame(np.random.rand(n), index=index_list, columns=['random_stuff'])
data['timestamp'] = '2017-06-26'
data.head()

Unnamed: 0,random_stuff,timestamp
0-0,0.242508,2017-06-26
0-1,0.073794,2017-06-26
0-2,0.470214,2017-06-26
0-3,0.55882,2017-06-26
0-4,0.825802,2017-06-26


In [45]:
data.to_csv(fname_maps, header=False)

# Ingestion spec

In [46]:
spec = {
    'type': 'index_hadoop',
    'spec': {
        'ioConfig': {
            'type': 'hadoop',
            'inputSpec': {
                'type': 'static',
                'paths': abs_dir + '/' + fname_maps
            }
        },
        'dataSchema': {
            'dataSource': 'maps',
            'granularitySpec': {
                'type': 'uniform',
                'segmentGranularity': 'day',
                'queryGranularity': 'none',
                'intervals': [
                    '2017-06-26/2017-06-27'
                ]
            },
            'parser': {
                'type': 'string',
                'parseSpec': {
                    'format': 'csv',
                    'timestampSpec': {
                        'format': 'auto',
                        'column': 'timestamp'
                    },
                    'columns': [
                        'id',
                        'random_stuff', 
                        'timestamp'
                    ],
                    'dimensionsSpec': {
                        'dimensions': [
                            'id',
                            'random_stuff'
                        ]
                    }
                }
            },
            'metricsSpec': [
            ]
        },
        'tuningConfig': {
            'type': 'hadoop',
            'partitionsSpec': {
                'type': 'hashed',
                'targetPartitionSize': 5000000
            },
            'jobProperties': {
            }
        }
    }
}
spec_string = json.dumps(spec)

In [47]:
with open(fname_spec, 'w') as outfile:
    json.dump(spec_string, outfile)