In [1]:
import boto3
import json

In [2]:
import numpy as np
import pandas as pd
import psycopg2
import sqlalchemy as db
import matplotlib as plt

%matplotlib inline

  """)


In [4]:
!pip install geoalchemy2

Collecting geoalchemy2
  Downloading https://files.pythonhosted.org/packages/f7/b0/5730254ba89826c2835e15c7854b1e33c3bbe0433b00a1770a962cd21a7b/GeoAlchemy2-0.7.0-py2.py3-none-any.whl
Installing collected packages: geoalchemy2
Successfully installed geoalchemy2-0.7.0
[33mYou are using pip version 10.0.1, however version 20.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [5]:
from sqlalchemy import update 
from geoalchemy2 import Geometry
from sqlalchemy.dialects import postgresql

In [6]:
#Get information from Secrets Manager, especially about the database
secretsmanager_client = boto3.client('secretsmanager')
dbSecretValues = secretsmanager_client.get_secret_value(SecretId='UrbanInstituteDevRDSParameter')

In [7]:
dbSecretValuesJson = json.loads(dbSecretValues['SecretString'])
#uncomment the following line to confirm the Database access information
#dbSecretValuesJson 

In [8]:
from sqlalchemy import create_engine
# Postgres username, password, and database name
POSTGRES_ADDRESS = dbSecretValuesJson['POSTGRES_ADDRESS'] ## INSERT YOUR DB ADDRESS IF IT'S NOT ON PANOPLY
POSTGRES_PORT = int(dbSecretValuesJson['POSTGRES_PORT'])
POSTGRES_USERNAME = dbSecretValuesJson['POSTGRES_USERNAME'] ## CHANGE THIS TO YOUR PANOPLY/POSTGRES USERNAME
POSTGRES_PASSWORD = dbSecretValuesJson['POSTGRES_PASSWORD'] ## CHANGE THIS TO YOUR PANOPLY/POSTGRES PASSWORD POSTGRES_DBNAME = 'database' ## CHANGE THIS TO YOUR DATABASE NAME
POSTGRES_DBNAME = dbSecretValuesJson['POSTGRES_DBNAME']
# A long string that contains the necessary Postgres login information
# postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'.format(
#     username=POSTGRES_USERNAME,
#     password=POSTGRES_PASSWORD,
#     ipaddress=POSTGRES_ADDRESS,
#     port=POSTGRES_PORT,
#     dbname=POSTGRES_DBNAME))
postgres_str = f"postgresql://{POSTGRES_USERNAME}:{POSTGRES_PASSWORD}@{POSTGRES_ADDRESS}/{POSTGRES_DBNAME}"

engine = create_engine(postgres_str)
connection = engine.connect()
metadata = db.MetaData()
building_detailed_info_table = db.Table('building_detailed_info_within_ajjits_selected_tracts', metadata, autoload=True, autoload_with=engine)

# Outliers
Here are some outliers 
bldgid	egid	altitude_m	rooftype	pct999	pct995	pct990	pct950	pct999 error	pct995 error	pct990 error	pct950 error
66936	DC00018902	22.29	Shed	58.4936	58.27	58.26	57.93	162%	161%	161%	160%

69617	DC00000154	21.25	Default	53.17935	53.17675	53.1735	53.1675	150%	150%	150%	150%

67072	DC00018620	21.68	Shed	54.19738	54.1869	54.18	54.149	150%	150%	150%	150%

67350	DC00000067	18.53	Shed	42.85913	42.85565	42.8513	42.8065	131%	131%	131%	131%

68274	DC00000155	24.54	Default	55.92356	55.91	55.8978	27.436	128%	128%	128%	12%

66884	DC00018619	26.39	Shed	59.21159	59.02	58.92	58.87	124%	124%	123%	123%

68176	DC00001032	21.21	Default	44.52783	44.52	44.5183	44.47	110%	110%	110%	110%

67221	DC00000047	20.69	Shed	42.86	42.85675	42.85	42.82	107%	107%	107%	107%

66392	DC00000303	20.92	Shed	43.26925	43.26625	43.2625	43.2525	107%	107%	107%	107%

70975	DC00001887	36.96	Default	23.45716	23.4458	23.4316	22.746	-37%	-37%	-37%	-38%

67279	DC00000068	36.61	Shed	23.104	22.275	21.07	17.52	-37%	-39%	-42%	-52%

70991	DC00001883	37.89	Default	21.13616	21.1116	21.0548	20.82	-44%	-44%	-44%	-45%

69540	DC00000075	73.48	Default	20.74856	20.7428	20.7356	20.446	-72%	-72%	-72%	-72%

69541	DC00000075	73.48	Default	20.74856	20.7428	20.7356	20.446	-72%	-72%	-72%	-72%


## Let's try to tackle the ones with high positive error percentage first
Take a look at the LAS points of 
66936 DC00018902 (-77.05051903134964,38.904188184028826) 
69617 DC00000154 (-77.04051684202682,38.89980029755603) 
67072 DC00018620 (-77.05079234475183,38.9039071578902)
67350 DC00000067 (-77.04775092724087,38.903232021664586)

In [16]:
sql_string = """select las_grnd_coord, lidar_z, intensity, classification, scan_angle, synthetic, withheld from lidarbuildingmatchtest 
        where bldgid = {0} and classification <> ALL (ARRAY[3, 4, 5]) """.format(66936)
las_within_bldg_border_df = pd.read_sql_query(sql_string, engine)

In [17]:
las_within_bldg_border_df

Unnamed: 0,las_grnd_coord,lidar_z,intensity,classification,scan_angle,synthetic,withheld
0,0101000080598F3D563F4353C0625A6851BD734340295C...,22.91,6700.0,6,-8.0,0,0
1,01010000802803105A3F4353C0B68E6DCFBC734340F628...,22.71,24959.0,6,-8.0,0,0
2,0101000080F42DA43A3F4353C08D516AEBBC734340F628...,22.71,25058.0,6,-8.0,0,0
3,0101000080D59FBC1A3F4353C0AEA32808BD734340C3F5...,22.76,21574.0,6,-8.0,0,0
4,01010000803DCA50FB3E4353C0F9652524BD7343405C8F...,22.86,32573.0,6,-8.0,0,0
5,0101000080C677EAE93E4353C06051963BBD73434090C2...,25.31,4288.0,6,-8.0,0,0
6,0101000080FE82EDDA3E4353C01647A541BD734340E17A...,22.88,23690.0,6,-8.0,0,0
7,0101000080DBA17ECA3E4353C045139357BD734340E17A...,25.38,27738.0,6,-8.0,0,0
8,0101000080E18F9FA93E4353C07A0F1375BD734340CDCC...,25.30,26264.0,6,-8.0,0,0
9,010100008002C542883E4353C012776A7DBD734340CDCC...,25.30,24959.0,6,-8.0,0,0
