In [7]:
import duckdb

# create a connection to a file called 'test_database.db'
con = duckdb.connect("../data/database_jupyter.db")

In [12]:
con.sql("SHOW TABLES")

┌────────────────────────────────────────┐
│                  name                  │
│                varchar                 │
├────────────────────────────────────────┤
│ crosswalk_zillow_cbsa                  │
│ crosswalk_zip_tract                    │
│ svi                                    │
│ tracts_per_zip_silver                  │
│ zillow_zhvi_sfr_zip_bronze             │
│ zillow_zhvi_sfr_zip_silver             │
│ zillow_zhvi_sfr_zip_with_svi_silver    │
│ zillow_zhvi_sfr_zip_with_tracts_silver │
└────────────────────────────────────────┘

In [14]:
con.sql(
    "SELECT * FROM zillow_zhvi_sfr_zip_with_svi_silver WHERE StateName = 'MD' ORDER BY avg_rpl_themes "
)

┌────────────┬───────────┬───────┬────────────┬────────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┐
│ RegionName │ StateName │  zip  │    date    │        zhvi        │ avg_rpl_theme1 │ avg_rpl_theme2 │ avg_rpl_theme3 │ avg_rpl_theme4 │ avg_rpl_themes │
│   int64    │  varchar  │ int64 │  varchar   │       double       │     double     │     double     │     double     │     double     │     double     │
├────────────┼───────────┼───────┼────────────┼────────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┤
│      21777 │ MD        │ 21777 │ 2020-02-29 │  359302.7342629391 │         0.0826 │           0.08 │         0.3739 │         0.0097 │         0.0102 │
│      21777 │ MD        │ 21777 │ 2020-07-31 │  363144.7122922807 │         0.0826 │           0.08 │         0.3739 │         0.0097 │         0.0102 │
│      21777 │ MD        │ 21777 │ 2020-05-31 │  362513.0849595755 │        

In [16]:
zillow_svi_df = con.sql(
    """
SELECT 
    StateName,
    zip::string as zip,
    year(date::DATE) as year, 
    avg(zhvi) as avg_zhvi, 
    avg(avg_rpl_theme1) as avg_rpl_theme1, 
    avg(avg_rpl_theme2) as avg_rpl_theme2, 
    avg(avg_rpl_theme3) as avg_rpl_theme3, 
    avg(avg_rpl_theme4) as avg_rpl_theme4, 
    avg(avg_rpl_themes) as avg_rpl_themes 
FROM zillow_zhvi_sfr_zip_with_svi_silver 
GROUP BY StateName, zip, year(date::DATE)
ORDER BY avg_zhvi"""
).df()
zillow_svi_df.head(1)

Unnamed: 0,StateName,zip,year,avg_zhvi,avg_rpl_theme1,avg_rpl_theme2,avg_rpl_theme3,avg_rpl_theme4,avg_rpl_themes
0,MI,48505,2020,15658.766859,0.850417,0.7281,0.859856,0.447839,0.782972


In [19]:
con.sql(
    """
    SELECT * FROM (
    SELECT 
        StateName,
        zip::string as zip,
        year(date::DATE) as year, 
        avg(zhvi) as avg_zhvi, 
        avg(avg_rpl_theme1) as avg_rpl_theme1, 
        avg(avg_rpl_theme2) as avg_rpl_theme2, 
        avg(avg_rpl_theme3) as avg_rpl_theme3, 
        avg(avg_rpl_theme4) as avg_rpl_theme4, 
        avg(avg_rpl_themes) as avg_rpl_themes 
    FROM zillow_zhvi_sfr_zip_with_svi_silver 
    GROUP BY StateName, zip, year(date::DATE)
    ORDER BY avg_zhvi)
    WHERE avg_zhvi is not null AND avg_rpl_themes IS NOT NULL AND StateName = 'DC'
"""
)

┌───────────┬─────────┬───────┬────────────────────┬─────────────────────┬──────────────────────┬─────────────────────┬─────────────────────┬─────────────────────┐
│ StateName │   zip   │ year  │      avg_zhvi      │   avg_rpl_theme1    │    avg_rpl_theme2    │   avg_rpl_theme3    │   avg_rpl_theme4    │   avg_rpl_themes    │
│  varchar  │ varchar │ int64 │       double       │       double        │        double        │       double        │       double        │       double        │
├───────────┼─────────┼───────┼────────────────────┼─────────────────────┼──────────────────────┼─────────────────────┼─────────────────────┼─────────────────────┤
│ DC        │ 20032   │  2020 │ 345486.65958388924 │  0.7532769230769231 │   0.6916769230769231 │  0.9451923076923077 │   0.790623076923077 │  0.8422230769230771 │
│ DC        │ 20019   │  2020 │  364626.5045981947 │  0.7525761904761906 │   0.7269095238095237 │  0.9736380952380955 │  0.8071714285714284 │  0.8450952380952382 │
│ DC        │ 20

In [None]:
from dash import Dash, dcc, html, Input, Output, callback
import plotly.express as px
from plotly.express import data

# Create sample app
app = Dash(__name__)

years = zillow_svi_df.year.unique()
years.sort()
states = zillow_svi_df.StateName.unique()
states.sort()
themes = [
    "avg_rpl_theme1",
    "avg_rpl_theme2",
    "avg_rpl_theme3",
    "avg_rpl_theme4",
    "avg_rpl_themes",
]


# dcc.Dropdown(df.nation.unique(), id='pandas-dropdown-2'),
# html.Div(id='pandas-output-container-2')

# Create layout with dropdown and graph
app.layout = html.Div(
    [
        html.Div(
            [
                dcc.Dropdown(years, id="pandas-dropdown-1", value=2020),
                html.Div(id="pandas-output-container-1"),
            ]
        ),
        html.Div(
            [
                dcc.Dropdown(states, id="pandas-dropdown-2"),
                html.Div(id="pandas-output-container-2"),
            ]
        ),
        html.Div(
            [
                dcc.Dropdown(themes, id="pandas-dropdown-3", value="avg_rpl_themes"),
                html.Div(id="pandas-output-container-3"),
            ]
        ),
    ]
)


def update_graph():
    pass


@callback(
    Output("pandas-output-container-3", "children"),
    Input("pandas-dropdown-1", "value"),
    Input("pandas-dropdown-2", "value"),
    Input("pandas-dropdown-3", "value"),
)
def update_output(year, state, rpl_theme):
    render_df = con.sql(
        f"""
        SELECT * FROM (
        SELECT 
            StateName,
            zip::string as zip,
            year(date::DATE) as year, 
            avg(zhvi) as avg_zhvi, 
            avg(avg_rpl_theme1) as avg_rpl_theme1, 
            avg(avg_rpl_theme2) as avg_rpl_theme2, 
            avg(avg_rpl_theme3) as avg_rpl_theme3, 
            avg(avg_rpl_theme4) as avg_rpl_theme4, 
            avg(avg_rpl_themes) as avg_rpl_themes 
        FROM zillow_zhvi_sfr_zip_with_svi_silver 
        GROUP BY StateName, zip, year(date::DATE)
        ORDER BY avg_zhvi)
        WHERE avg_zhvi is not null AND {rpl_theme} IS NOT NULL"""
    ).df()

    render_df = render_df.loc[zillow_svi_df["year"] == year]
    render_df = render_df.loc[zillow_svi_df["StateName"] == state]
    render_df["idx"] = range(1, len(render_df) + 1)

    # Create figure with subplots
    fig = make_subplots(rows=2, cols=1)
    fig.update_layout(height=1000, bargap=0)
    # Add first bar chart
    fig.add_trace(
        go.Bar(
            x=render_df.zip,
            y=render_df.avg_zhvi,
            name="Average ZHVI",
        ),
        row=1,
        col=1,
    )

    # Add second bar chart
    fig.add_trace(
        go.Bar(
            x=render_df.zip,
            y=render_df[rpl_theme],
            name="Average SVI Theme",
        ),
        row=2,
        col=1,
    )

    fig_scatter = px.scatter(
        render_df,
        x="idx",
        y=rpl_theme,
        trendline="ols",
        trendline_color_override="black",
    )
    trendline = fig_scatter.data[1]
    trendline.x = list(render_df.zip)
    fig.add_trace(trendline, row=2, col=1)
    return dcc.Graph(figure=fig)


if __name__ == "__main__":
    app.run_server(debug=True, port=1222)

---------------------------------------------------------------------------
InvalidInputException                     Traceback (most recent call last)
Cell In[20], line 62, in update_output(
    year=2020,
    state=None,
    rpl_theme='avg_rpl_themes'
)
     38 @callback(
     39     Output('pandas-output-container-3', 'children'),
     40     Input('pandas-dropdown-1', 'value'),
   (...)
     44 )
     45 def update_output(year, state, rpl_theme):
     46     render_df= con.sql(
     47         f"""
     48         SELECT * FROM (
     49         SELECT 
     50             StateName,
     51             zip::string as zip,
     52             year(date::DATE) as year, 
     53             avg(zhvi) as avg_zhvi, 
     54             avg(avg_rpl_theme1) as avg_rpl_theme1, 
     55             avg(avg_rpl_theme2) as avg_rpl_theme2, 
     56             avg(avg_rpl_theme3) as avg_rpl_theme3, 
     57             avg(avg_rpl_theme4) as avg_rpl_theme4, 
     58             avg(avg_rpl_th

In [None]:
con.close()