In [None]:
import duckdb

# connect to duckdb database
con = duckdb.connect("../data/analysis.duckdb")

In [None]:
df = con.execute("SELECT area, zoning_pressure " \
"FROM annual_metrics " \
"WHERE Year = '2023' " \
"ORDER BY Zoning_Pressure DESC;").df()
print(df)

In [None]:
df = con.execute("SELECT year, area, structural_gap, " \
"RANK() OVER (PARTITION BY year ORDER BY structural_gap DESC) " \
"as gap_rank " \
"FROM cumulative_metrics " \
"WHERE year > CAST(2014 AS VARCHAR) " \
"ORDER BY year, gap_rank " ).df()
print(df.head(10))

In [None]:
df = con.execute("SELECT area, AVG(zoning_pressure) AS avg_pressure " \
"FROM annual_metrics " \
"GROUP BY area " \
"ORDER BY avg_pressure DESC;").df()
print(df)

In [None]:
print(con.execute("DESCRIBE annual_metrics;").df())
print(con.execute("DESCRIBE cumulative_metrics;").df())
print(con.execute("DESCRIBE wages_metrics;").df())
print(con.execute("DESCRIBE permits_metrics;").df())
print(con.execute("DESCRIBE permits_metrics_qtr;").df())
print(con.execute("DESCRIBE quarterly_metrics;").df())

In [None]:
df = con.sql(
    "SELECT Code, Year, Quarter, COUNT(*) AS months_in_qtr " \
    "FROM permits_metrics " \
    "GROUP BY Code, Year, Quarter " \
    "HAVING COUNT(*) NOT IN (3, 0);" \
).df()
print(df)

In [None]:
df = con.execute(
'CREATE TABLE IF NOT EXISTS permits_metrics_qtr AS ' \
'SELECT ' \
'  Code,' \
'  Year,' \
'  Quarter,' \
'  SUM("Total Permits") AS total_permits_qtr ' \
'FROM permits_metrics ' \
'GROUP BY Code, Year, Quarter;' ).df()

In [None]:
df = con.sql("SELECT * from permits_metrics_qtr LIMIT 5;").df()
print(df)

In [None]:
df = con.execute(
    'CREATE TABLE IF NOT EXISTS quarterly_metrics AS ' \
    'SELECT ' \
    '  w.Area AS area, ' \
    '  w.Code AS code, ' \
    '  w.Year AS year, ' \
    '  w.Quarter AS quarter, ' \
    '  w."Total Wages" AS total_wages, ' \
    '  p.total_permits_qtr as total_permits, ' \
    'FROM wages_metrics AS w ' \
    'JOIN permits_metrics_qtr AS p ' \
    'ON w.Code = p.Code ' \
    'AND w.Year = p.Year ' \
    'AND w.Quarter = p.Quarter ' \
).df()

In [None]:
df = con.sql("SELECT * FROM quarterly_metrics;").df()
print(df)

In [None]:
df = con.sql('' \
'SELECT ' \
'  w.Area AS area, ' \
'  w.Code AS code, ' \
'  w.Year AS year, ' \
'  w.Quarter AS quarter, ' \
'  w."Total Wages" AS total_wages, ' \
'  p.Month AS month, ' \
'  p."Total Permits" AS total_permits ' \
'FROM wages_metrics AS w ' \
'JOIN permits_metrics as p ' \
'ON w.Code = p.Code ' \
'AND w.Year = p.Year ' \
'AND w.Quarter = p.Quarter ' \
#'WHERE w.Code = 47900 ' \
'ORDER BY ' \
'  area DESC, ' \
'  year DESC, ' \
'  month DESC ' \
'LIMIT 12;').df()
print(df)

In [None]:
con.close()