# PV Rooftops

## 0. Build Database Connection

PyAthena is a Python DB API 2.0 (PEP 249) compliant client for the Amazon Athena JDBC driver.
https://github.com/laughingman7743/PyAthena

In [1]:
from pyathena.connection import Connection
from pyathena.pandas_cursor import PandasCursor

In [15]:
AWS_REGION_NAME = "us-west-2"
DATABASE_NAME = "oedidb"
ASPECTS_TABLE_NAME = "pv_rooftops_aspects"
BUILDINGS_TABLE_NAME = "pv_rooftops_buildings"
PLANES_TABLE_NAME = "pv_rooftops_developable_planes"
RASD_TABLE_NAME = "pv_rooftops_rasd"
S3_STAGING_DIR = "s3://nrel-tests/pv-rooftops-staging"

In [3]:
cursor = Connection(region_name=AWS_REGION_NAME, s3_staging_dir=S3_STAGING_DIR).cursor()
pandas_cursor = Connection(region_name=AWS_REGION_NAME, s3_staging_dir=S3_STAGING_DIR).cursor(PandasCursor)

## 1. Retrieve Table Metadata

## 1.1 pv_rooftops_aspects

In [4]:
import pandas as pd

In [5]:
# Retrieve schema information
result = cursor.execute(f"DESCRIBE {DATABASE_NAME}.{ASPECTS_TABLE_NAME}")
columns = [[item.strip() for item in row[0].split("\t")] for row in result.fetchall()]
pd.DataFrame(columns, columns=["NAME", "TYPE", "FROM"])

Unnamed: 0,NAME,TYPE,FROM
0,gid,bigint,
1,city,string,
2,state,string,
3,year,bigint,
4,bldg_fid,bigint,
5,aspect,bigint,
6,the_geom_96703,string,
7,the_geom_4326,string,
8,region_id,bigint,
9,__index_level_0__,bigint,


In [7]:
# Retrieve parition information
result = cursor.execute(f"SHOW PARTITIONS {DATABASE_NAME}.{ASPECTS_TABLE_NAME}")
for row in result.fetchall():
    print(row)

('city_year=detroit_mi_12',)
('city_year=lexington_ky_12',)
('city_year=sanbernardinoriverside_ca_12',)
('city_year=jacksonville_fl_10',)
('city_year=philadelphia_pa_07',)
('city_year=modesto_ca_10',)
('city_year=sandiego_ca_13',)
('city_year=bakersfield_ca_10',)
('city_year=lubbock_tx_08',)
('city_year=newark_nj_07',)
('city_year=reno_nv_07',)
('city_year=springfield_il_09',)
('city_year=concord_nh_09',)
('city_year=minneapolis_mn_07',)
('city_year=charlotte_nc_12',)
('city_year=houston_tx_10',)
('city_year=newyork_ny_05',)
('city_year=kansascity_mo_10',)
('city_year=denver_co_12',)
('city_year=miami_fl_09',)
('city_year=santafe_nm_09',)
('city_year=lansing_mi_13',)
('city_year=omaha_ne_13',)
('city_year=scranton_pa_08',)
('city_year=springfield_ma_13',)
('city_year=tampa_fl_08',)
('city_year=cleveland_oh_12',)
('city_year=hartford_ct_13',)
('city_year=indianapolis_in_12',)
('city_year=stlouis_mo_08',)
('city_year=ftbelvoir_dc_12',)
('city_year=newhaven_ct_13',)
('city_year=sarasota_f

## 1.2 pv_rooftops_buildings

In [16]:
# Retrieve schema information
result = cursor.execute(f"DESCRIBE {DATABASE_NAME}.{BUILDINGS_TABLE_NAME}")
columns = [[item.strip() for item in row[0].split("\t")] for row in result.fetchall()]
pd.DataFrame(columns, columns=["NAME", "TYPE", "FROM"])

Unnamed: 0,NAME,TYPE,FROM
0,gid,bigint,
1,bldg_fid,bigint,
2,the_geom_96703,string,
3,the_geom_4326,string,
4,city,string,
5,state,string,
6,year,bigint,
7,region_id,bigint,
8,__index_level_0__,bigint,
9,city_year,string,


In [17]:
# Retrieve parition information
result = cursor.execute(f"SHOW PARTITIONS {DATABASE_NAME}.{BUILDINGS_TABLE_NAME}")
for row in result.fetchall():
    print(row)

('city_year=dover_de_09',)
('city_year=boulder_co_14',)
('city_year=salem_or_08',)
('city_year=dayton_oh_12',)
('city_year=newyork_ny_05',)
('city_year=oxnard_ca_10',)
('city_year=olympia_wa_10',)
('city_year=omaha_ne_07',)
('city_year=sanantonio_tx_13',)
('city_year=buffalo_ny_08',)
('city_year=chicago_il_08',)
('city_year=minneapolis_mn_12',)
('city_year=sarasota_fl_09',)
('city_year=charleston_wv_09',)
('city_year=flint_mi_09',)
('city_year=louisville_ky_06',)
('city_year=trenton_nj_08',)
('city_year=grandrapids_mi_13',)
('city_year=jeffersoncity_mo_08',)
('city_year=santafe_nm_09',)
('city_year=jackson_ms_07',)
('city_year=jacksonville_fl_10',)
('city_year=elpaso_tx_07',)
('city_year=pensacola_fl_09',)
('city_year=reno_nv_07',)
('city_year=tulsa_ok_08',)
('city_year=lincoln_ne_08',)
('city_year=worcester_ma_09',)
('city_year=syracuse_ny_08',)
('city_year=birmingham_al_08',)
('city_year=boise_id_13',)
('city_year=albuquerque_nm_12',)
('city_year=sanfrancisco_ca_13',)
('city_year=con

## 1.3 pv_rooftops_developable_planes

In [18]:
# Retrieve schema information
result = cursor.execute(f"DESCRIBE {DATABASE_NAME}.{PLANES_TABLE_NAME}")
columns = [[item.strip() for item in row[0].split("\t")] for row in result.fetchall()]
pd.DataFrame(columns, columns=["NAME", "TYPE", "FROM"])

Unnamed: 0,NAME,TYPE,FROM
0,bldg_fid,bigint,
1,footprint_m2,double,
2,slope,bigint,
3,flatarea_m2,double,
4,slopeconversion,double,
5,slopearea_m2,double,
6,zip,string,
7,zip_perc,double,
8,aspect,bigint,
9,gid,bigint,


In [19]:
# Retrieve parition information
result = cursor.execute(f"SHOW PARTITIONS {DATABASE_NAME}.{PLANES_TABLE_NAME}")
for row in result.fetchall():
    print(row)

('city_year=montgomery_al_07',)
('city_year=orlando_fl_09',)
('city_year=reno_nv_07',)
('city_year=bridgeport_ct_06',)
('city_year=tucson_az_07',)
('city_year=columbus_oh_06',)
('city_year=saltlakecity_ut_12',)
('city_year=tampa_fl_08',)
('city_year=birmingham_al_08',)
('city_year=omaha_ne_13',)
('city_year=santafe_nm_09',)
('city_year=tulsa_ok_08',)
('city_year=greensboro_nc_09',)
('city_year=newyork_ny_13',)
('city_year=dayton_oh_06',)
('city_year=neworleans_la_08',)
('city_year=youngstown_oh_08',)
('city_year=neworleans_la_12',)
('city_year=stlouis_mo_08',)
('city_year=augusta_me_08',)
('city_year=omaha_ne_07',)
('city_year=boise_id_13',)
('city_year=sandiego_ca_13',)
('city_year=seattle_wa_11',)
('city_year=worcester_ma_09',)
('city_year=sanbernardinoriverside_ca_12',)
('city_year=springfield_ma_13',)
('city_year=ftbelvoir_dc_12',)
('city_year=augusta_ga_10',)
('city_year=dayton_oh_12',)
('city_year=pensacola_fl_09',)
('city_year=anaheim_ca_10',)
('city_year=bismarck_nd_08',)
('cit

## 1.4 pv_rooftops_rasd

In [20]:
# Retrieve schema information
result = cursor.execute(f"DESCRIBE {DATABASE_NAME}.{RASD_TABLE_NAME}")
columns = [[item.strip() for item in row[0].split("\t")] for row in result.fetchall()]
pd.DataFrame(columns, columns=["NAME", "TYPE", "FROM"])

Unnamed: 0,NAME,TYPE,FROM
0,gid,bigint,
1,the_geom_96703,string,
2,the_geom_4326,string,
3,city,string,
4,state,string,
5,year,bigint,
6,region_id,bigint,
7,serial_id,bigint,
8,__index_level_0__,bigint,
9,city_year,string,


In [21]:
# Retrieve parition information
result = cursor.execute(f"SHOW PARTITIONS {DATABASE_NAME}.{RASD_TABLE_NAME}")
for row in result.fetchall():
    print(row)

('city_year=laguardiajfk_ny_07',)
('city_year=lancaster_pa_10',)
('city_year=carsoncity_nv_09',)
('city_year=fresno_ca_13',)
('city_year=augusta_me_08',)
('city_year=chicago_il_12',)
('city_year=montpelier_vt_09',)
('city_year=richmond_va_13',)
('city_year=jackson_ms_07',)
('city_year=bismarck_nd_08',)
('city_year=coloradosprings_co_06',)
('city_year=mobile_al_10',)
('city_year=providence_ri_12',)
('city_year=seattle_wa_11',)
('city_year=minneapolis_mn_07',)
('city_year=bridgeport_ct_13',)
('city_year=jeffersoncity_mo_08',)
('city_year=losangeles_ca_07',)
('city_year=springfield_il_09',)
('city_year=tucson_az_07',)
('city_year=lubbock_tx_08',)
('city_year=flint_mi_09',)
('city_year=modesto_ca_10',)
('city_year=olympia_wa_10',)
('city_year=birmingham_al_08',)
('city_year=mcallen_tx_08',)
('city_year=oklahomacity_ok_13',)
('city_year=kansascity_mo_12',)
('city_year=louisville_ky_06',)
('city_year=corpuschristi_tx_12',)
('city_year=albuquerque_nm_12',)
('city_year=shreveport_la_08',)
('ci

## 2. PV Rooftops Query

In [36]:
co_aspects = pandas_cursor.execute(
    f"""
    SELECT *
    FROM {DATABASE_NAME}.{ASPECTS_TABLE_NAME}
    WHERE city = 'Denver' and year = 2012
    """
).as_pandas()

co_aspects.head()

OperationalError: GENERIC_INTERNAL_ERROR: integer overflow

## 3. Data Analytics & Visualization