In [20]:
# modelling imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import metrics
from joblib import dump, load

# Voila app imports
from IPython.display import display, HTML
import ipywidgets as widgets
from ipywidgets import GridBox, Layout

## Dataset

y: has the client subscribed a term deposit? (binary: 'yes','no')<br>
poutcome: outcome of the previous marketing campaign (categorical: 'failure','nonexistent','success','unknown')<br>
default: has credit in default? (categorical: 'no','yes',)<br>
housing: has housing loan? (categorical: 'no','yes')<br>
marital: self-explanatory<br>
job: self-explanatory

In [72]:
df = pd.read_csv("bank-full.csv", skiprows=0, delimiter=";")
df = df[["job", "marital", "default", "housing", "poutcome", "y"]]

In [73]:
le = LabelEncoder()
label = le.fit_transform(df["y"])
df.drop("y", axis=1, inplace=True)
df["y"] = label

In [74]:
df["default"].unique()

array(['no', 'yes'], dtype=object)

## Sorting widgets

In [5]:
ALL = "ALL"


def unique_values_ALL(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.insert(0, ALL)  # insert ALL option
    return unique

In [6]:
# widgets

output = widgets.Output()

plot_output = widgets.Output()

job_dropdown = widgets.Dropdown(
    options=unique_values_ALL(df["job"]), value="ALL", description="client job"
)
outcome_dropdown = widgets.Dropdown(
    options=unique_values_ALL(df["poutcome"]),
    value="ALL",
    description="outcomes",
)

# ------------event handlers-----------------------


def job_dropdown_handler(change):
    shared_filter(change.new, outcome_dropdown.value)


def outcome_dropdown_handler(change):
    shared_filter(job_dropdown.value, change.new)


# -----------filter--------------------------------


def shared_filter(job, outcome):
    output.clear_output(wait=True)
    plot_output.clear_output()

    if (job == "ALL") and (outcome == "ALL"):
        filter = df

    elif job == "ALL":
        filter = df[df.poutcome == outcome]

    elif outcome == "ALL":
        filter = df[df.job == job]

    else:
        filter = df[(df.job == job) & (df.poutcome == outcome)]

    with output:
        display(filter)

    with plot_output:
        sns.kdeplot(filter["y"].astype(float), shade=True)
        plt.show()


# --------------------------------------------------

# binding
job_dropdown.observe(job_dropdown_handler, names="value")
outcome_dropdown.observe(outcome_dropdown_handler, names="value")

## Dashboard

In [7]:
item_layout = widgets.Layout(margin="0 0 25px 0", width="500px")

In [8]:
input_widgets = widgets.HBox(
    [job_dropdown, outcome_dropdown], layout=item_layout
)

tab = widgets.Tab([output, plot_output], layout=item_layout)
tab.set_title(0, "Dataset Exploration")
tab.set_title(1, "KDE Plot")

dashboard = widgets.VBox([input_widgets, tab])
display(dashboard)

VBox(children=(HBox(children=(Dropdown(description='client job', options=('ALL', 'admin.', 'blue-collar', 'ent…

## Model

In [75]:
def process(df):
    d = {}
    for i, key in enumerate(df["job"].unique()):
        d[key] = i + 1

    numeric_var = {
        "poutcome": {"success": 4, "failure": 3, "other": 2, "unknown": 1},
        "job": d,
    }
    df = df.replace(numeric_var)
    housing = le.fit_transform(df["housing"])
    marital = le.fit_transform(df["marital"])
    default = le.fit_transform(df["default"])

    df.drop(["housing", "marital", "default"], axis=1, inplace=True)
    df = df.assign(housing=housing, marital=marital, default=default)

    return df

In [79]:
data = process(df)
X = data.drop("y", axis=1)
y = data["y"]

In [83]:
print(X.shape, y.shape)

(45211, 5) (45211,)


In [84]:
# split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42
)

In [88]:
svc = SVC(kernel="linear")
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

0.8793541966161672


In [89]:
dump(svc, "linear_svm.joblib")

['linear_svm.joblib']

## Plotter function

In [26]:
def plotly_hist(df, jobtype, poutcome, prediction=1):

    plot1 = df[df.job]["y"].astype(int)
    plot2 = df[(df.poutcome == poutcome) & (df.job == jobtype)]["y"].astype(
        int
    )

    fig = go.Figure()

    fig.add_trace(
        go.Histogram(
            x=plot_series1,
            name="All Fields",
            histnorm="percent",
            xbins=dict(size=1000),
        )
    )
    fig.add_trace(
        go.Histogram(
            x=plot_series2,
            name=majorfield,
            histnorm="percent",
            xbins=dict(size=1000),
        )
    )
    fig.add_vline(
        x=prediction,
        line_dash="dash",
        annotation_text=f"Predicted: ${round(prediction, 2):,}",
    )

    fig.update_traces(opacity=0.55)
    return fig

## user input widgets

In [24]:
jobs = sorted(list(df["job"].unique()))
marital = sorted(list(df["marital"].unique()))
default = sorted(list(df["default"].unique()))
housing = sorted(list(df["housing"].unique()))
poutcome = sorted(list(df["poutcome"].unique()))

In [36]:
job_widget = widgets.Dropdown(
    options=jobs,
    value="admin.",
    description="job:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="80%"),
)
marital_widget = widgets.Dropdown(
    options=marital,
    value="married",
    description="marital status:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="80%"),
)
default_widget = widgets.Dropdown(
    options=default,
    value="no",
    description="defaults:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="80%"),
)
housing_widget = widgets.Dropdown(
    options=housing,
    value="yes",
    description="housing loan:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="80%"),
)
poutcome_widget = widgets.Dropdown(
    options=poutcome,
    value="unknown",
    description="marital status:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="80%"),
)

## predictions

In [49]:
def results(job, marital, default, housing, poutcome):
    df_new = pd.DataFrame(
        [[job, marital, default, housing, poutcome]],
        columns=["job", "marital", "default", "housing", "poutcome"],
    )
    df_new_train = pd.get_dummies(
        df_new, columns=["job", "marital", "default", "housing", "poutcome"]
    )
    [[prediction]] = svc.predict(df_new_train)
    pred = prediction if prediction > 0 else np.nan
    return pred

In [59]:
results("technician", "single", "no", "yes", "success")

ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- default_yes
- housing_no
- job_admin.
- job_blue-collar
- job_entrepreneur
- ...


In [42]:
out = widgets.interactive_output(
    results,
    {
        "job": job_widget,
        "marital": marital_widget,
        "default": default_widget,
        "housing": housing_widget,
        "poutcome": poutcome_widget,
    },
)

In [58]:
df.poutcome

0        unknown
1        unknown
2        unknown
3        unknown
4        unknown
          ...   
45206    unknown
45207    unknown
45208    success
45209    unknown
45210      other
Name: poutcome, Length: 45211, dtype: object