In [14]:
from multiprocessing import Pool
import time

import networkx as nx
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon

from nbhd import data, geometry, utils

In [2]:
def get_translator(df, first_column="first", second_column="second"):
    graph = nx.from_pandas_edgelist(df, first_column, second_column)
    subgraphs = [graph.subgraph(c) for c in nx.connected_components(graph)]
    translator = {n: list(sorted(g.nodes))[0] for g in subgraphs for n in g.nodes}
    return translator

In [3]:
def sounds_institutional(local_type):

    words = [
        "Education",
        "Terminal",
        "Station",
        "Services",
        "Port",
        "Oil",
        "Hosp",
        "Heli",
        "Electric",
    ]
    return any([w.lower() in local_type.lower() for w in words])

In [4]:
def calculate_nonparametric_features(pixel: Polygon, db: data.Base):
    """Calculate non-parametric features.

    Parameters
    ----------
    pixel : Polygon
        pixel
    db : data.Base
        db
    """

    all_properties = db.knn("properties", "buildings", pixel)
    properties = all_properties.loc[all_properties.dist.eq(0)]
    properties = add_building_stats(properties)
    properties = find_stacked_properties(properties, pixel, db)
    properties = add_building_types(properties, pixel, db)
    properties = get_faceblock_stats(properties, pixel, db)

    return properties

In [5]:
def add_building_stats(dataframe: pd.DataFrame) -> pd.DataFrame:
    """add_building_stats.

    Parameters
    ----------
    dataframe : pd.DataFrame
        dataframe

    Returns
    -------
    pd.DataFrame

    """

    properties_per_building = dict(dataframe.buildings_id.value_counts())
    dataframe["num_properties_in_building"] = dataframe.buildings_id.apply(
        lambda x: properties_per_building.get(x)
    )
    dataframe["building_footprint"] = gpd.GeoSeries.from_wkb(
        dataframe.buildings_geometry
    ).area
    dataframe["footprint_per_property"] = (
        dataframe.building_footprint / dataframe.num_properties_in_building
    )
    return dataframe

In [6]:
def find_stacked_properties(properties_df: pd.DataFrame, 
        pixel: Polygon, db: data.Base):
    """find_stacked_properties.

    Parameters
    ----------
    properties_df : pd.DataFrame
        properties_df
    pixel : Polygon
        pixel
    db : data.Base
        db
    """

    nn_properties = db.knn("properties", "properties", pixel)
    stacked_properties = nn_properties.loc[nn_properties.dist.eq(0)]
    stacked_properties.columns = [
        "first",
        "first_geom",
        "second",
        "second_geom",
        "dist",
    ]
    translator = get_translator(stacked_properties)
    stacked_properties["stacked"] = stacked_properties["first"].apply(
        lambda x: translator.get(x)
    )
    stacked_dict = dict(zip(stacked_properties["first"], stacked_properties.stacked))
    stacked_counts = dict(stacked_properties.stacked.value_counts())
    pid_stacked_counts = dict(
        zip(
            stacked_properties["first"],
            stacked_properties.stacked.apply(lambda x: stacked_counts.get(x)),
        )
    )
    properties_df["stacked"] = properties_df.properties_id.apply(
        lambda x: stacked_dict.get(x)
    )
    properties_df["stacked_count"] = properties_df.properties_id.apply(
        lambda x: pid_stacked_counts.get(x, 0)
    )

    return properties_df

In [7]:
def add_building_types(dataframe: pd.DataFrame, pixel: Polygon, 
        db: data.Base):
    """add_building_types.

    Parameters
    ----------
    dataframe : pd.DataFrame
        dataframe
    pixel : Polygon
        pixel
    db : data.Base
        db
    """

    nn_bn = db.knn("names", "buildings", pixel, t1_columns=["local_type"])
    nn_bn["institution"] = nn_bn.local_type.apply(lambda x: sounds_institutional(x))
    institutional_buildings = dict(zip(nn_bn.buildings_id, nn_bn.institution))
    institution_type = dict(zip(nn_bn.buildings_id, nn_bn.local_type))
    dataframe["institution"] = dataframe.buildings_id.apply(
        lambda x: institutional_buildings.get(x, 0) * institution_type.get(x, 0)
    )
    dataframe.institution = dataframe.institution.apply(
        lambda x: None if x == 0 else x
    )
    return dataframe

In [8]:
# for given faceblock
# find number of neighbouring faceblocks
# find number of properties on neighbouring faceblocks

def count_neighbours(row, df):
    
    df = df.loc[~df.roads_id.duplicated()].copy()
    start, end = row.startNode, row.endNode
    df = df.loc[(df.startNode==start) | (df.startNode==end) | (df.endNode==start) | (df.endNode==end)]
    df = df.loc[df.roads_id!=row.roads_id]
    neighbouring_faceblocks = len(df)
    properties_on_neighbouring_faceblocks = df.num_properties_on_road.sum()
    
    return pd.Series({'roads_id':row.roads_id, 
                      'num_neighbouring_faceblocks': neighbouring_faceblocks,
           'num_properties_on_neighbouring_faceblocks': properties_on_neighbouring_faceblocks})

In [9]:
def get_faceblock_stats(properties, pixel, db):
    p_bdg = dict(zip(properties.properties_id, properties.buildings_id))
    roads = db.intersects("roads", pixel)
    roads = roads[['id', 'startNode', 'endNode', 'name1', 'length', 'road_function', 'geometry']]
    roads = roads.rename(columns={'id':'roads_id'})

    nn_pr = db.knn(
        "properties",
        "roads",
        polygon=pixel,
    )

    roads = roads.merge(nn_pr, on='roads_id', how='outer')

    property_counts_dict = dict(nn_pr.value_counts('roads_id'))
    roads['num_properties_on_road'] = roads.roads_id.apply(lambda x: property_counts_dict.get(x,0))
    roads['length_per_property'] = roads.length / roads.num_properties_on_road
    roads['log_length_per_property'] = np.log(roads.length_per_property)
    neighbours = roads.loc[~roads.roads_id.duplicated()].apply(axis=1, func=lambda row: count_neighbours(row,roads))

    roads = roads.merge(neighbours, on='roads_id', how='outer')

    roads['building'] = roads.properties_id.apply(lambda x: p_bdg.get(x, None))

    building_counts_dict = dict(roads.groupby('roads_id').building.nunique())
    roads['num_buildings_on_road'] = roads.roads_id.apply(lambda x: building_counts_dict.get(x,0))
    
    properties = properties.merge(roads, how='outer', on='properties_id')
    
    properties = properties[['properties_id', 'properties_geometry_x', 'stacked', 'stacked_count',
      'buildings_id', 'buildings_geometry', 'building_footprint', 'num_properties_in_building', 'footprint_per_property', 'institution',
      'roads_id', 'roads_geometry', 'dist_y', 'startNode', 'endNode', 'name1', 'length', 'road_function',
      'num_properties_on_road', 'length_per_property', 'log_length_per_property', 'num_neighbouring_faceblocks', 'num_properties_on_neighbouring_faceblocks', 'num_buildings_on_road']]
    
    properties = properties.rename(columns={'properties_geometry_x':'geometry',
                              'dist_y': 'dist_to_road_from_property'})
    
    properties = properties[[c for c in properties.columns if 'geometry' not in c]]
    
    return properties

In [10]:
def save_for_pixel(pixel_geom_and_id):
    try:
        pixel, pixel_id = pixel_geom_and_id
        print(f'{time.ctime()}: {pixel_id})
        print(f'Calculating fixed features for pixel {pixel_id}...')
        df = calculate_nonparametric_features(pixel, db)
        df['pixel'] = pixel_id
        print(f'Saving {len(df)} features...')
        df.to_sql('fixed_features', db.engine, if_exists='append',index=False)
        print(len(df))
        return (pixel_id, True, len(df))
    except Exception as e:
        print(Exception)
        return (pixel_id, False, e)

In [11]:
db = data.Base()
pixels = db.select('pixels')

Initializing database connection...
Database connected!


In [12]:
pd.options.mode.chained_assignment = None

In [12]:
with Pool(18) as pool:
    r = pool.map(save_for_pixel, pixels.values)

Calculating fixed features for pixel 135230...Calculating fixed features for pixel 135250...



Exception during reset or similar
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 2212, in run_callable
    return conn.run_callable(callable_, *args, **kwargs)
  File "/opt/conda/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1653, in run_callable
    return callable_(self, *args, **kwargs)
  File "/opt/conda/lib/python3.8/site-packages/sqlalchemy/dialects/postgresql/base.py", line 2794, in has_table
    return bool(cursor.first())
  File "/opt/conda/lib/python3.8/site-packages/sqlalchemy/engine/result.py", line 1358, in first
    return self._non_result(None)
  File "/opt/conda/lib/python3.8/site-packages/sqlalchemy/engine/result.py", line 1236, in _non_result
    util.raise_(
  File "/opt/conda/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
    raise exception
sqlalchemy.exc.ResourceClosedError: This result object does not return rows. It has been closed automatically.

Duri

Calculating fixed features for pixel 135270...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe["num_properties_in_building"] = dataframe.buildings_id.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe["building_footprint"] = gpd.GeoSeries.from_wkb(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe["footprint_per_property"] = (
A value is trying to be set on a co

Saving 287 features...
Saving 237 features...


In [13]:
r

[('135230',
  False,
  AttributeError("'DataFrame' object has no attribute 'properties_id'")),
 ('135250', True, 237),
 ('135270', True, 287)]