In [None]:
from collections import OrderedDict
from datetime import datetime
import logging
import os
from pprint import pprint

import geojson
import geopandas as gpd
import fiona
import matplotlib as plt
import pandas as pd
import pyodbc
from shapely.geometry import (
    LineString, mapping, shape
)
from shapely.ops import (
    split as shapely_split, transform
)

from python_gis.poc.landgrid_processor import LandgridProcessor
from python_gis.poc.io.pgsql import PgWriter
from python_gis.poc.io.mssql import MsWriter
from python_gis.poc.io.shapefile import (
    read_shapefile
)
from python_gis.poc.tools.spatial import (
    remove_holes, fix_anti_meridian
)
import python_gis.poc.util.log_config
from python_gis.poc.util.sql_config import (
    TX_BLOCKS, US_COUNTIES
)

logger = logging.getLogger(__name__)

In [None]:
!ls $DIML_HOME

In [None]:
# Ms Sql
ms_driver = os.environ["MSSQL_DRIVER"]
ms_host = os.environ["MSSQL_SERVER"]
ms_db = os.environ["MSSQL_DATABASE"]
ms_user = os.environ["MSSQL_UID"]
ms_pwd = os.environ["MSSQL_PWD"]

# Postgis
pg_host = os.environ["PG_SERVER"]
pg_db = os.environ["PG_DATABASE"]
pg_user = os.environ["PG_UID"]
pg_pwd = os.environ["PG_PWD"]

# WGS84 (epsg:4326)
gdb_path = os.path.join(os.environ["DATA_DIR"], 'landgrid', 'DI_basemaps_WGS84.gdb')
ddl_path = os.path.join(os.environ["DIML_HOME"], 'database', 'mssql', 'schema.sql')
out_path = os.path.join(os.environ["DATA_DIR"], 'shapefile_out')
idx_path = os.path.join(os.environ["DIML_HOME"], 'database', "indexes.sql")
test_path = os.path.join(os.environ["DATA_DIR"], 'mssql_test')

## Dissolve Abstracts to Blocks

In [None]:
%%time

tx_path = os.path.join(out_path, f'Texas_Abstracts.shp')
tx_df = gpd.read_file(tx_path)

# TODO: May need to exclude NULL Township as it results in large
#  MultiPolygons. Check against reference (TWP: NULL, BLK: 1).
tx_df.dropna(how='all', subset=['Township', 'Block'],
             inplace=True)

tx_df.loc[tx_df.Township.isna(), ['Township']] = 'Missing'
tx_df.loc[tx_df.Block.isna(), ['Block']] = 'Missing'

tx_blocks_df = (tx_df.loc[:, ['Township', 'Block', 'geometry']]  # .copy()
                .dissolve(by=['Township', 'Block'], aggfunc='first')
                .reset_index()
                )

In [None]:
tx_blocks_df.loc[(tx_blocks_df.Township == 'Missing') & 
                 (tx_blocks_df.Block == '1'), :]

## Deaggregate MultiPolygons to Polygons

In [None]:
# Get multipolygons
tx_blocks_mp_df = (tx_blocks_df.loc[tx_blocks_df.geometry.geom_type == 'MultiPolygon', :]
                   .copy())

In [None]:
# Split list of geometries into DataFrame columns
df_polys = pd.DataFrame(tx_blocks_mp_df.geometry.tolist()
                        , index=tx_blocks_mp_df.index)

In [None]:
# Save original index values
df_polys['id'] = df_polys.index

# Melt (convert columns to rows)
df_polys = df_polys.melt(
            id_vars='id'
            , value_name='SinglePolygon'
        )  #.copy()

In [None]:
# Remove empty values
df_polys.dropna(subset=['SinglePolygon'], inplace=True)
df_polys.set_index('id', inplace=True)

In [None]:
# Join with original dissolved data set
df_deagg = tx_blocks_df.join(df_polys, how='left').copy()

In [None]:
df_deagg.loc[df_deagg.geometry.geom_type == 'MultiPolygon', ['geometry']] = \
    df_deagg[df_deagg.geometry.geom_type == 'MultiPolygon'].SinglePolygon

In [None]:
df_deagg.drop(columns=['variable', 'SinglePolygon'], inplace=True)

In [None]:
df_deagg.loc[(tx_blocks_df.Township == 'Missing') &
             (tx_blocks_df.Block == '1'), :]

## Calculate Overlaps

### Cache Counties

In [None]:
%%time

county_path = os.path.join(out_path, f'{US_COUNTIES}_shgrid.shp')
county_df = gpd.read_file(county_path)
county_df.sindex

county_df.head()

### Just slice Block 1
- All of these 580 records have the same index value because they were originally a single record, but now they will be re-indexed.

In [None]:
olap_df = df_deagg.loc[(df_deagg.Township == 'Missing') & 
                       (df_deagg.Block == '1'), :].copy()

# Each polygon is unique so reset index can be done here.
olap_df.reset_index(inplace=True, drop=True)

In [None]:
olap_df

### Spatial join with Counties

In [None]:
%%time

olaps_df = gpd.sjoin(olap_df, county_df, how='inner', op='intersects')

In [None]:
# Note same index value
olaps_df

### Remove duplicate County Names for the same record

In [None]:
olaps_pre_df = (olaps_df.loc[:, ['County_Nam']]
                    .reset_index()
                    .drop_duplicates()  # remove duplicate names
                    .set_index('index')
                    )

In [None]:
olaps_pre_df

### Do the equivalent of STRING_AGG

In [None]:
olaps_join_df = (olaps_pre_df
                     .groupby(olaps_pre_df.index, sort=False)
                     .aggregate(**{'Colaps': ('County_Nam', ','.join)})  # Pandas 0.25
                     )


In [None]:
olaps_join_df

In [None]:
%%time

tx_block_path = os.path.join(test_path, f'{TX_BLOCKS}_deagg.shp')
df_deagg.to_file(tx_block_path)