We'll estimate a density function of SA1s, then cat SA1s together.

In [25]:
import json, numpy

In [14]:
with open('../app/data/suburb_regions.json') as f:
    suburbs = json.load(f)

In [4]:
with open('../app/data/sa2_regions.json') as f:
    sa1s = json.load(f)

In [72]:
import sklearn.neighbors, sklearn.preprocessing

In [73]:
kde = sklearn.neighbors.KernelDensity()
scaler = sklearn.preprocessing.StandardScaler()

In [47]:
feature_names = [
    'rental_rate',
    'median_rent',
    'income',
    'religious',
    'population',
    'unemployment',
]

rows_and_columns = []
for region in sa1s:
    features = [region[feature] for feature in feature_names]
    for zone_type in "RCIWP":
        features.append(region['zoning'].get(zone_type, 0))
    rows_and_columns.append(numpy.nan_to_num(features))

In [75]:
scaled_features = scaler.fit_transform(rows_and_columns)

In [78]:
kde.fit(scaled_features)

KernelDensity(algorithm='auto', atol=0, bandwidth=1.0, breadth_first=True,
       kernel='gaussian', leaf_size=40, metric='euclidean',
       metric_params=None, rtol=0)

In [94]:
def sample_sa1():
    sample = kde.sample()
    sample = scaler.inverse_transform(sample)
    sample[0, :][sample[0, :] < 0] = 0
    sa1 = dict(zip(feature_names, sample[0]))
    sa1['zoning'] = {}
    sample[0, -5:] = numpy.clip(sample[0, -5:], 0, 10000)
    sample[0, -5:] /= sample[0, -5:].sum()
    sample[0, -5:] *= 10000
    assert all(sample[0] >= 0)
    for zone_type, s in zip("RCIWP", sample[0, -5:]):
        sa1['zoning'][zone_type] = numpy.clip(s, 0, 10000)
    finance_pc = numpy.array([  1261.91,   1843.  , 100000.  ])
    sa1['income_level'] = int((sa1['income'] < finance_pc).argmax() + 1)
    return sa1

In [95]:
sample_sa1()

{'rental_rate': 0.0,
 'median_rent': 707.1792815044342,
 'income': 1576.4728714166088,
 'religious': 0.8175779011343542,
 'population': 364.2365097554806,
 'unemployment': 0.0240452738050246,
 'zoning': {'R': 6688.060795265839,
  'C': 1852.197497941839,
  'I': 0.0,
  'W': 35.998444923152995,
  'P': 1423.7432618691696},
 'income_level': 2}