In [2]:
import duckdb

# connect to duckdb database
con = duckdb.connect("../data/analysis.duckdb")

In [2]:
df = con.execute("SELECT area, zoning_pressure " \
"FROM annual_metrics " \
"WHERE Year = '2023' " \
"ORDER BY Zoning_Pressure DESC;").df()
print(df)

                              Area  Zoning_pressure
0          Seattle-Tacoma-Bellevue         1.505018
1  Washington-Arlington-Alexandria         1.467216
2  Minneapolis-St Paul-Bloomington         1.411690
3     Austin-Round Rock-Georgetown         1.203556
4     Portland-Vancouver-Hillsboro         1.144236


In [3]:
df = con.execute("SELECT year, area, structural_gap, " \
"RANK() OVER (PARTITION BY year ORDER BY structural_gap DESC) " \
"as gap_rank " \
"FROM cumulative_metrics " \
"WHERE year > CAST(2014 AS VARCHAR) " \
"ORDER BY year, gap_rank " ).df()
print(df.head(10))

   Year                             Area  Structural_Gap  gap_rank
0  2015  Washington-Arlington-Alexandria        1.000000         1
1  2015          Seattle-Tacoma-Bellevue        1.000000         1
2  2015     Portland-Vancouver-Hillsboro        1.000000         1
3  2015  Minneapolis-St Paul-Bloomington        1.000000         1
4  2015     Austin-Round Rock-Georgetown        1.000000         1
5  2016          Seattle-Tacoma-Bellevue        1.068484         1
6  2016  Washington-Arlington-Alexandria        1.030361         2
7  2016     Austin-Round Rock-Georgetown        1.025338         3
8  2016     Portland-Vancouver-Hillsboro        0.942739         4
9  2016  Minneapolis-St Paul-Bloomington        0.841181         5


In [4]:
df = con.execute("SELECT area, AVG(zoning_pressure) AS avg_pressure " \
"FROM annual_metrics " \
"GROUP BY area " \
"ORDER BY avg_pressure DESC;").df()
print(df)

                              Area  avg_pressure
0          Seattle-Tacoma-Bellevue      1.080820
1     Portland-Vancouver-Hillsboro      1.068923
2  Washington-Arlington-Alexandria      1.038183
3     Austin-Round Rock-Georgetown      1.027821
4  Minneapolis-St Paul-Bloomington      0.997981


In [8]:
df = con.execute("DESCRIBE annual_metrics;").df()
print(df)
df = con.execute("DESCRIBE cumulative_metrics;").df()
print(df)
df = con.execute("DESCRIBE wages_metrics;").df()
print(df)   
df = con.execute("DESCRIBE permits_metrics;").df()
print(df)
df = con.execute("DESCRIBE permits_metrics_qtr;").df()  
print(df)


         column_name column_type null   key default extra
0               Area     VARCHAR  YES  None    None  None
1               Code     VARCHAR  YES  None    None  None
2               Year     VARCHAR  YES  None    None  None
3        Total Wages      BIGINT  YES  None    None  None
4   Real_Total_Wages      DOUBLE  YES  None    None  None
5   Change_Real_Wage      DOUBLE  YES  None    None  None
6      Total Permits      DOUBLE  YES  None    None  None
7      Change_permit      DOUBLE  YES  None    None  None
8         Wage_Index      DOUBLE  YES  None    None  None
9       Permit_Index      DOUBLE  YES  None    None  None
10   Zoning_pressure      DOUBLE  YES  None    None  None
          column_name column_type null   key default extra
0                Area     VARCHAR  YES  None    None  None
1                Year     VARCHAR  YES  None    None  None
2    Real_Total_Wages      DOUBLE  YES  None    None  None
3       Total Permits      DOUBLE  YES  None    None  None
4    Cumu

In [17]:
df = con.sql(
    "SELECT Code, Year, Quarter, COUNT(*) AS months_in_qtr " \
    "FROM permits_metrics " \
    "GROUP BY Code, Year, Quarter " \
    "HAVING COUNT(*) NOT IN (3, 0);" \
).df()
print(df)

Empty DataFrame
Columns: [Code, Year, Quarter, months_in_qtr]
Index: []


In [9]:
df = con.execute(
'CREATE TABLE IF NOT EXISTS permits_metrics_qtr AS ' \
'SELECT ' \
'  Code,' \
'  Year,' \
'  Quarter,' \
'  SUM("Total Permits") AS total_permits_qtr ' \
'FROM permits_metrics ' \
'GROUP BY Code, Year, Quarter;' ).df()

In [14]:
df = con.sql("SELECT * from permits_metrics_qtr LIMIT 5;").df()
print(df)

    Code  Year Quarter  total_permits_qtr
0  42660  2014       2             6227.0
1  42660  2018       2             6600.0
2  42660  2022       4             5207.0
3  38900  2016       4             3130.0
4  33460  2017       2             3826.0


In [None]:
df = con.execute(
    'CREATE TABLE IF NOT EXISTS quarterly_metrics AS ' \
    'SELECT ' \
    '  w.Area AS area, ' \
    '  w.Code AS code, ' \
    '  w.Year AS year, ' \
    '  w.Quarter AS quarter, ' \
    '  w."Total Wages" AS total_wages, ' \
    '  p.total_permits_qtr as total_permits, ' \
    'FROM wages_metrics AS w ' \
    'JOIN permits_metrics_qtr AS p ' \
    'ON w.Code = p.Code ' \
    'AND w.Year = p.Year ' \
    'AND w.Quarter = p.Quarter ' \
).df()

In [25]:
df = con.sql("SELECT * FROM quarterly_metrics;").df()
print(df)

                                area   code  year quarter  total_wages  \
0            Seattle-Tacoma-Bellevue  42660  2014       1  28345432328   
1            Seattle-Tacoma-Bellevue  42660  2014       2  26468223145   
2            Seattle-Tacoma-Bellevue  42660  2014       3  30379404154   
3            Seattle-Tacoma-Bellevue  42660  2014       4  29625276201   
4            Seattle-Tacoma-Bellevue  42660  2015       1  29545105623   
..                               ...    ...   ...     ...          ...   
215  Washington-Arlington-Alexandria  47900  2023       4  78819191513   
216  Washington-Arlington-Alexandria  47900  2024       1  81656822892   
217  Washington-Arlington-Alexandria  47900  2024       2  76447010380   
218  Washington-Arlington-Alexandria  47900  2024       3  75297861409   
219  Washington-Arlington-Alexandria  47900  2024       4  82518142845   

     total_permits  
0           3891.0  
1           6227.0  
2           6466.0  
3           5178.0  
4     

In [None]:
df = con.sql('' \
'SELECT ' \
'  w.Area AS area, ' \
'  w.Code AS code, ' \
'  w.Year AS year, ' \
'  w.Quarter AS quarter, ' \
'  w."Total Wages" AS total_wages, ' \
'  p.Month AS month, ' \
'  p."Total Permits" AS total_permits ' \
'FROM wages_metrics AS w ' \
'JOIN permits_metrics as p ' \
'ON w.Code = p.Code ' \
'AND w.Year = p.Year ' \
'AND w.Quarter = p.Quarter ' \
#'WHERE w.Code = 47900 ' \
'ORDER BY ' \
'  area DESC, ' \
'  year DESC, ' \
'  month DESC ' \
'LIMIT 12;').df()
print(df)

                            area   code  year quarter  total_wages month  \
0   Austin-Round Rock-Georgetown  12420  2024       4  29697229153    12   
1   Austin-Round Rock-Georgetown  12420  2024       4  29697229153    11   
2   Austin-Round Rock-Georgetown  12420  2024       4  29697229153    10   
3   Austin-Round Rock-Georgetown  12420  2024       3  27007129079    09   
4   Austin-Round Rock-Georgetown  12420  2024       3  27007129079    08   
5   Austin-Round Rock-Georgetown  12420  2024       3  27007129079    07   
6   Austin-Round Rock-Georgetown  12420  2024       2  26725691149    06   
7   Austin-Round Rock-Georgetown  12420  2024       2  26725691149    05   
8   Austin-Round Rock-Georgetown  12420  2024       2  26725691149    04   
9   Austin-Round Rock-Georgetown  12420  2024       1  29205025839    03   
10  Austin-Round Rock-Georgetown  12420  2024       1  29205025839    02   
11  Austin-Round Rock-Georgetown  12420  2024       1  29205025839    01   

    total_p

In [26]:
con.close()