In [361]:
import pandas as pd
from pandas.io.json import json_normalize

import numpy as np

In [362]:
df = json_normalize(pd.read_json('./all.json').to_dict('records')).reset_index(drop=True)

In [363]:
# Filter out data
df = data[(data.rooms < 8) & (data['buildingData.city'] != 'Järvenpää')].copy()
df.loc[:, 'rooms'] = df.rooms.astype(int)

In [364]:
df.loc[:, 'price'] = df.price.apply(lambda val: str(val).replace('\xa0', '').replace('€', '')).astype('float')

def norm_city(value):
    v = value.lower()
    if 'espoo' in v:
        return 'Espoo'
    elif 'helsinki' in v:
        return 'Helsinki'
    elif 'vantaa' in v or '01350' in v:
        return 'Vantaa'
    
    return value
    
df.loc[:, 'buildingData.city'] = df['buildingData.city'].apply(norm_city)
df.loc[:, 'buildingData.district'] = df['buildingData.district'].apply(lambda v: v.lower() if v else '')
df = df[(df['buildingData.district'] != '')]

df.loc[:, 'neighborhood'] = df.apply(lambda item: "{}, {}".format(item['buildingData.city'], item['buildingData.district']), axis=1)
df.loc[:, 'price_per_sq_m'] = df.apply(lambda item: item.price / item.size, axis=1)
df.loc[:, 'avg_price'] = df.apply(lambda item: item.price_per_sq_m * item.size, axis=1)

In [365]:
def percentile(n):
    def wrapper(x):
        return np.percentile(x, n)
    wrapper.__name__ = 'p%s' % n
    return wrapper

grouped = df.fillna(0).groupby(['neighborhood', 'rooms'])\
    .agg({'avg_price': ['mean']})\
    .astype(int)

In [369]:
import json

def format_groups(groups):
    agg_key = ('avg_price', 'mean')
    items = list(grouped.to_dict()[agg_key].items())
    agg_stats = {}
    for item in items:
        (city_key, rooms), avg_price = item
        agg_stats.setdefault(city_key, [])
        agg_stats[city_key].append({'rooms': str(rooms), 'avg_price': avg_price})
    # return agg_stats
    return [{'city_key': city_key, 'stats': stats} for city_key, stats in agg_stats.items()]

with open('./stats.json', 'w') as fd:
    json.dump(format_groups(grouped), fd, sort_keys=True, indent=2)