# Interactive Dashboard App

**Project Goal:** To build a dynamic web application that will allow users to choose their own features, build a model, and evaluate its performance through a graphic user interface - a tool that will allow anyone to build a model without code.

**Specific Goals:**
- Import and clean dataset
- Build web application layout
- Build interactive elements
- Launch Application.


Build Application:
- Application Layout
- Variance bar chart: Call back, decorator
- K-Means slider
- PCA scatter plot

In [77]:
import pandas as pd 
import plotly.express as px
from dash import Input, Output, dcc, html
from jupyter_dash import JupyterDash
from dash import Dash
from scipy.stats.mstats import trimmed_var 
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

### Prepare Data

Import

- Creating a wrangle function to import the data:

In [78]:
def wrangle(filepath):

    """Read SCF data file into ``DataFrame``.

    Returns only credit fearful households whose net worth is less than $2 million.

    Parameters
    ----------
    filepath : str
        Location of CSV file.
    """
    df = pd.read_csv(filepath)
    
    mask = (df["TURNFEAR"]==1) & (df["NETWORTH"]< 2e6)
    df = df[mask]
    
    return df

- Using the wrangle function to import the data:

In [79]:
df=wrangle("data/customer.csv")
print("df type:", type(df))
print("df shape:", df.shape)
df.head()

df type: <class 'pandas.core.frame.DataFrame'>
df shape: (4418, 351)


Unnamed: 0,YY1,Y1,WGT,HHSEX,AGE,AGECL,EDUC,EDCL,MARRIED,KIDS,...,NWCAT,INCCAT,ASSETCAT,NINCCAT,NINC2CAT,NWPCTLECAT,INCPCTLECAT,NINCPCTLECAT,INCQRTCAT,NINCQRTCAT
5,2,21,3790.476607,1,50,3,8,2,1,3,...,1,2,1,2,1,1,4,4,2,2
6,2,22,3798.868505,1,50,3,8,2,1,3,...,1,2,1,2,1,1,4,3,2,2
7,2,23,3799.468393,1,50,3,8,2,1,3,...,1,2,1,2,1,1,4,4,2,2
8,2,24,3788.076005,1,50,3,8,2,1,3,...,1,2,1,2,1,1,4,4,2,2
9,2,25,3793.066589,1,50,3,8,2,1,3,...,1,2,1,2,1,1,4,4,2,2


### Build Dashboard

#### Application Layout

Instatntiate Application

In [80]:
app = Dash(__name__)

print("app type:", type(app))

app type: <class 'dash.dash.Dash'>


Variant Bar Chart

Bar Chart: Presentation Layer

A function that returns the 5 highest variance features in the df.

- Step 1 is to create a get_high_var_features function that returns the five highest-variance features in a DataFrame. 

In [81]:
def get_high_var_features(trimmed=True, return_feat_names=True):

    """Returns the five highest-variance features of ``df``.

    Parameters
    ----------
    trimmed : bool, default=True
        If ``True``, calculates trimmed variance, removing bottom and top 10%
        of observations.

    return_feat_names : bool, default=False
        If ``True``, returns feature names as a ``list``. If ``False``
        returns ``Series``, where index is feature names and values are
        variances.
    """
    # Calculate variance
    if trimmed:
        top_five_features = (
        df.apply(trimmed_var).sort_values().tail(5)
        )
    else:
        top_five_features = df.var().sort_values().tail(5)
    
    # Extract names
    if return_feat_names:
        top_five_features = top_five_features.index.tolist()
    
    return top_five_features

In [82]:
get_high_var_features()

['DEBT', 'NETWORTH', 'HOUSES', 'NFIN', 'ASSET']

In [83]:
get_high_var_features(trimmed=False)

['NHNFIN', 'HOUSES', 'NETWORTH', 'NFIN', 'ASSET']

In [84]:
get_high_var_features(return_feat_names=False)

DEBT        3.089865e+09
NETWORTH    3.099929e+09
HOUSES      4.978660e+09
NFIN        8.456442e+09
ASSET       1.175370e+10
dtype: float64

Bar Chart: Service Layer

- We need to create a serve_bar_chart function that returns a plotly express bar chart of the five highest-variance features.

In [85]:
@app.callback(
    Output("bar-chart", "figure"), Input("trim-button", "value")
)
def serve_bar_chart(trimmed=True):

    """Returns a horizontal bar chart of five highest-variance features.

    Parameters
    ----------
    trimmed : bool, default=True
        If ``True``, calculates trimmed variance, removing bottom and top 10%
        of observations.
    """
    # Get features
    top_five_features = get_high_var_features(trimmed=trimmed, return_feat_names=False)
    
    #Build bar chart
    fig = px.bar(
        x=top_five_features,
        y=top_five_features.index,
        orientation="h"
    )
    fig.update_layout(
        xaxis_title="Variance",
        yaxis_title="Feature"
    )
    
    return fig

K-Means Slider: Business Layer

In [86]:
def get_model_metrics(trimmed=True, k=2, return_metrics=False):

    """Build ``KMeans`` model based on five highest-variance features in ``df``.

    Parameters
    ----------
    trimmed : bool, default=True
        If ``True``, calculates trimmed variance, removing bottom and top 10%
        of observations.

    k : int, default=2
        Number of clusters.

    return_metrics : bool, default=False
        If ``False`` returns ``KMeans`` model. If ``True`` returns ``dict``
        with inertia and silhouette score.

    """
    # Get high var features
    features = get_high_var_features(trimmed=trimmed, return_feat_names=True)
    
    # Create feature metrics
    X = df[features]
    
    # Build Model
    model = make_pipeline(
        StandardScaler(), KMeans(n_clusters=k, random_state=42)
    )
    model.fit(X)
    
    if return_metrics:
        # Calculate Inertia
        i = model.named_steps["kmeans"].inertia_
        # Calculate silhoutte score
        ss = silhouette_score(X, model.named_steps["kmeans"].labels_)
        # Put results into a dict
        metrics = {
            "inertia": round(i),
            "silhouette": round(ss, 3)
        }
        # Return the dict to user
        return metrics
            
    return model

In [87]:
get_model_metrics(trimmed=True, k=5, return_metrics=True)

{'inertia': 5010, 'silhouette': 0.656}

In [88]:
get_model_metrics(trimmed=True, k=5, return_metrics=False)

K-Means Slider: Service Layer

In [89]:
@app.callback(
    Output("metrics", "children"), 
    Input("trim-button", "value"),
    Input("K-slider", "value")
)
def serve_metrics(trimmed=True, k=2):

    """Returns list of ``H3`` elements containing inertia and silhouette score
    for ``KMeans`` model.

    Parameters
    ----------
    trimmed : bool, default=True
        If ``True``, calculates trimmed variance, removing bottom and top 10%
        of observations.

    k : int, default=2
        Number of clusters.
    """
    # Get metrics
    metrics = get_model_metrics(trimmed=trimmed, k=k, return_metrics=True)
    
    # Add metrics to HTML elements
    text = [
        html.H3(f"Inertia: {metrics['inertia']}"),
        html.H3(f"Silhouette Score: {metrics['silhouette']}")
    ]
    return text

PCA Scatter Plot: Business Layer

In [92]:
def get_pca_labels(trimmed=True, k=2):

    """
    ``KMeans`` labels.

    Parameters
    ----------
    trimmed : bool, default=True
        If ``True``, calculates trimmed variance, removing bottom and top 10%
        of observations.

    k : int, default=2
        Number of clusters.
    """
    # Create feature matrix
    features = get_high_var_features(trimmed=trimmed, return_feat_names=True)
    X = df[features]
    
    # Build Transformer
    transformer = PCA(n_components=2, random_state=42)
    
    # Transform data
    X_t = transformer.fit_transform(X)
    X_pca = pd.DataFrame(X_t, columns=["PC1", "PC2"])
    
    # Add labels
    model = get_model_metrics(trimmed=trimmed, k=k, return_metrics=False)
    
    X_pca["labels"] = model.named_steps["kmeans"].labels_.astype(str)
    
    X_pca.sort_values("labels", inplace=True)
                         
    return X_pca

In [93]:
get_pca_labels().tail()

Unnamed: 0,PC1,PC2,labels
1570,-229796.419844,-14301.836873,1
1571,-229805.583716,-14250.840322,1
1572,-229814.747589,-14199.843771,1
1611,-213724.57142,-39060.460885,1
4417,334191.956229,-186450.064242,1


PCA Scatter Plot: Service Layer

Layout

In [90]:
app.layout = html.Div(
    [
        # Application Title
        html.H1("Survey of Consumer Finances"),
        
        # Bar Chart Element
        html.H2("High Variance Features"), 

        # Bar chart
        dcc.Graph(id="bar-chart"),
        dcc.RadioItems(
            options=[
                {"label": "trimmed Variance", "value": True},
                {"label": "not trimmed", "value": False}
            ],
            value=True,
            id="trim-button"
        ),
        # K- means slider
        html.H2("K-means Clustering"),
        html.H3("Number of Cluster(K)"),
        dcc.Slider(min=2, max=12, step=1, value=2, id="K-slider"),
        html.Div(id="metrics"),

        # PCA scatter
        dcc.Graph(id="pca-scatter")
    ]

)

### Application Deployment

In [91]:
app.run_server(debug=True)