In [None]:
!pip install -r ../requirements.txt

In [9]:
import duckdb
import os
# create a connection to a file called 'test_database.db'
# Use full path or relative path based on your working directory
print(os.getcwd()) 


d:\NASA_Interview_Fresh\housing_data\jupyter_notebooks


In [10]:

con = duckdb.connect("../data/medallion.db")

In [11]:
con.sql("SHOW TABLES")

┌──────────────────────────┐
│           name           │
│         varchar          │
├──────────────────────────┤
│ svi                      │
│ tracts_per_zip           │
│ zhvi_sfr_zip             │
│ zhvi_sfr_zip_with_svi    │
│ zhvi_sfr_zip_with_tracts │
│ zip_tract                │
└──────────────────────────┘

In [12]:
con.sql(
    "SELECT * FROM zhvi_sfr_zip_with_svi WHERE StateName = 'MD' ORDER BY avg_rpl_themes "
)

┌────────────┬───────────┬───────┬────────────┬────────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┐
│ RegionName │ StateName │  zip  │    date    │        zhvi        │ avg_rpl_theme1 │ avg_rpl_theme2 │ avg_rpl_theme3 │ avg_rpl_theme4 │ avg_rpl_themes │
│   int64    │  varchar  │ int64 │  varchar   │       double       │     double     │     double     │     double     │     double     │     double     │
├────────────┼───────────┼───────┼────────────┼────────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┤
│      21777 │ MD        │ 21777 │ 2022-02-28 │  457127.3972036655 │         0.0714 │         0.1038 │         0.4599 │         0.0057 │         0.0108 │
│      21777 │ MD        │ 21777 │ 2022-08-31 │  475167.2715428457 │         0.0714 │         0.1038 │         0.4599 │         0.0057 │         0.0108 │
│      21777 │ MD        │ 21777 │ 2022-01-31 │  452250.0584766461 │        

In [13]:
zillow_svi_df = con.sql(
    """
SELECT 
    StateName,
    zip::string as zip,
    year(date::DATE) as year, 
    avg(zhvi) as avg_zhvi, 
    avg(avg_rpl_theme1) as avg_rpl_theme1, 
    avg(avg_rpl_theme2) as avg_rpl_theme2, 
    avg(avg_rpl_theme3) as avg_rpl_theme3, 
    avg(avg_rpl_theme4) as avg_rpl_theme4, 
    avg(avg_rpl_themes) as avg_rpl_themes 
FROM zhvi_sfr_zip_with_svi 
GROUP BY StateName, zip, year(date::DATE)
ORDER BY avg_zhvi"""
).df()
zillow_svi_df.head(1)

Unnamed: 0,StateName,zip,year,avg_zhvi,avg_rpl_theme1,avg_rpl_theme2,avg_rpl_theme3,avg_rpl_theme4,avg_rpl_themes
0,MI,48505,2022,25628.925296,0.832889,0.736356,0.84855,0.451722,0.770572


In [14]:
con.sql(
    """
    SELECT * FROM (
    SELECT 
        StateName,
        zip::string as zip,
        year(date::DATE) as year, 
        avg(zhvi) as avg_zhvi, 
        avg(avg_rpl_theme1) as avg_rpl_theme1, 
        avg(avg_rpl_theme2) as avg_rpl_theme2, 
        avg(avg_rpl_theme3) as avg_rpl_theme3, 
        avg(avg_rpl_theme4) as avg_rpl_theme4, 
        avg(avg_rpl_themes) as avg_rpl_themes 
    FROM zhvi_sfr_zip_with_svi 
    GROUP BY StateName, zip, year(date::DATE)
    ORDER BY avg_zhvi)
    WHERE avg_zhvi is not null AND avg_rpl_themes IS NOT NULL AND StateName = 'DC'
"""
)

┌───────────┬─────────┬───────┬────────────────────┬─────────────────────┬──────────────────────┬─────────────────────┬────────────────────┬─────────────────────┐
│ StateName │   zip   │ year  │      avg_zhvi      │   avg_rpl_theme1    │    avg_rpl_theme2    │   avg_rpl_theme3    │   avg_rpl_theme4   │   avg_rpl_themes    │
│  varchar  │ varchar │ int64 │       double       │       double        │        double        │       double        │       double       │       double        │
├───────────┼─────────┼───────┼────────────────────┼─────────────────────┼──────────────────────┼─────────────────────┼────────────────────┼─────────────────────┤
│ DC        │ 20006   │  2022 │  257210.8715989442 │ 0.42169999999999996 │ 0.007266666666666669 │  0.5806333333333334 │ 0.9794333333333333 │ 0.42863333333333337 │
│ DC        │ 20032   │  2022 │ 362699.57341268007 │  0.7566538461538461 │   0.6499153846153848 │  0.9460769230769231 │ 0.7854846153846152 │  0.8297923076923078 │
│ DC        │ 20036   

In [15]:
from dash import Dash, dcc, html, Input, Output, callback
import plotly.express as px
from plotly.express import data
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Create sample app
app = Dash(__name__)

years = zillow_svi_df.year.unique()
years.sort()
states = zillow_svi_df.StateName.unique()
states.sort()
themes = [
    "avg_rpl_theme1",
    "avg_rpl_theme2",
    "avg_rpl_theme3",
    "avg_rpl_theme4",
    "avg_rpl_themes",
]


# dcc.Dropdown(df.nation.unique(), id='pandas-dropdown-2'),
# html.Div(id='pandas-output-container-2')

# Create layout with dropdown and graph
app.layout = html.Div(
    [
        html.Div(
            [
                dcc.Dropdown(years, id="pandas-dropdown-1", value=2020),
                html.Div(id="pandas-output-container-1"),
            ]
        ),
        html.Div(
            [
                dcc.Dropdown(states, id="pandas-dropdown-2"),
                html.Div(id="pandas-output-container-2"),
            ]
        ),
        html.Div(
            [
                dcc.Dropdown(themes, id="pandas-dropdown-3", value="avg_rpl_themes"),
                html.Div(id="pandas-output-container-3"),
            ]
        ),
    ]
)


def update_graph():
    pass


@callback(
    Output("pandas-output-container-3", "children"),
    Input("pandas-dropdown-1", "value"),
    Input("pandas-dropdown-2", "value"),
    Input("pandas-dropdown-3", "value"),
)
def update_output(year, state, rpl_theme):
    render_df = con.sql(
        f"""
        SELECT * FROM (
        SELECT 
            StateName,
            zip::string as zip,
            year(date::DATE) as year, 
            avg(zhvi) as avg_zhvi, 
            avg(avg_rpl_theme1) as avg_rpl_theme1, 
            avg(avg_rpl_theme2) as avg_rpl_theme2, 
            avg(avg_rpl_theme3) as avg_rpl_theme3, 
            avg(avg_rpl_theme4) as avg_rpl_theme4, 
            avg(avg_rpl_themes) as avg_rpl_themes 
        FROM zhvi_sfr_zip_with_svi 
        GROUP BY StateName, zip, year(date::DATE)
        ORDER BY avg_zhvi)
        WHERE avg_zhvi is not null AND {rpl_theme} IS NOT NULL"""
    ).df()

    render_df = render_df.loc[zillow_svi_df["year"] == year]
    render_df = render_df.loc[zillow_svi_df["StateName"] == state]
    render_df["idx"] = range(1, len(render_df) + 1)

    # Create figure with subplots
    fig = make_subplots(rows=2, cols=1)
    fig.update_layout(height=1000, bargap=0)
    # Add first bar chart
    fig.add_trace(
        go.Bar(
            x=render_df.zip,
            y=render_df.avg_zhvi,
            name="Average ZHVI",
        ),
        row=1,
        col=1,
    )

    # Add second bar chart
    fig.add_trace(
        go.Bar(
            x=render_df.zip,
            y=render_df[rpl_theme],
            name="Average SVI Theme",
        ),
        row=2,
        col=1,
    )

    fig_scatter = px.scatter(
        render_df,
        x="idx",
        y=rpl_theme,
        trendline="ols",
        trendline_color_override="black",
    )
    trendline = fig_scatter.data[1]
    trendline.x = list(render_df.zip)
    fig.add_trace(trendline, row=2, col=1)
    return dcc.Graph(figure=fig)


if __name__ == "__main__":
    app.run_server(debug=True, port=1222)

---------------------------------------------------------------------------
InvalidInputException                     Traceback (most recent call last)
Cell In[15], line 79, in update_output(
    year=2020,
    state=None,
    rpl_theme='avg_rpl_themes'
)
     55 @callback(
     56     Output("pandas-output-container-3", "children"),
     57     Input("pandas-dropdown-1", "value"),
   (...)
     60 )
     61 def update_output(year, state, rpl_theme):
     62     render_df = con.sql(
     63         f"""
     64         SELECT * FROM (
     65         SELECT 
     66             StateName,
     67             zip::string as zip,
     68             year(date::DATE) as year, 
     69             avg(zhvi) as avg_zhvi, 
     70             avg(avg_rpl_theme1) as avg_rpl_theme1, 
     71             avg(avg_rpl_theme2) as avg_rpl_theme2, 
     72             avg(avg_rpl_theme3) as avg_rpl_theme3, 
     73             avg(avg_rpl_theme4) as avg_rpl_theme4, 
     74             avg(avg_rpl_t

In [16]:
# con.close()