In [None]:
#######################
# standard code block #
#######################

# see https://ipython.readthedocs.io/en/stable/interactive/magics.html
%pylab inline

# sets backend to render higher res images
%config InlineBackend.figure_formats = ['retina']

# uses custom metis style sheet for notebooks
from IPython.core.display import HTML
HTML("""<link rel="stylesheet" href="https://soph.info/metis/nb.css" type="text/css"/>""")

#######################
#       imports       #
#######################
import pandas as pd
import seaborn as sns
# import sklearn

# seaborn.set_style("whitegrid")

In [None]:
from sklearn import datasets, model_selection

# load in the iris dataset
iris_data = datasets.load_iris()

x = iris_data.data
y = iris_data.target

x_train, x_test, y_train, y_test = model_selection.train_test_split(x,y, stratify=y)

In [None]:
from sklearn import linear_model

# fit our model and score it on the test set
lr_model = linear_model.LogisticRegression(class_weight="balanced", solver='lbfgs', multi_class='auto')
lr_model.fit(x_train, y_train)

print(f"""
train score: {lr_model.score(x_train, y_train):%}
test score: {lr_model.score(x_test, y_test):%}
""")

In [None]:
iris_data.target_names

In [None]:
iris_data.feature_names

In [None]:
# let's store some of these handy bits of data within the model

lr_model.feature_names = iris_data.feature_names
lr_model.target_names = iris_data.target_names

In [None]:
def plot_ovr_coefs(model):
    """Helper to plot coefficients of a ovr model like Logistic Regression"""
    n_models, n_coef = model.coef_.shape

    lod = []
    for i in range(n_models):
        for j in range(n_coef):
            lod.append({
                "OVR Model": f"{model.target_names[i]} vs rest",
                "Coefficient Name": f"{model.feature_names[j]}",
                "Coefficient Value": model.coef_[i,j]
            })
    coef_df = pd.DataFrame(lod)


    sns.barplot(y="Coefficient Name", x="Coefficient Value", hue="OVR Model", orient="h", data=coef_df)

In [None]:
# let's plot the coefficients to demonstrate that the model and what it has learned are preserved through pickling

plot_ovr_coefs(lr_model)

In [None]:
import pickle

with open("predictor/lr.pkl", "wb") as f:
    pickle.dump(lr_model, f)

I'm going to test out our pickling ability by deleting the model, showing that the above figure doesn't work, and then loading the model from pickle and showing that it does

In [None]:
del lr_model

plot_ovr_coefs(lr_model)

In [None]:
with open("predictor/lr.pkl", "rb") as f:
    lr_model2 = pickle.load(f)

In [None]:
plot_ovr_coefs(lr_model2)

We can also predict on synthetic data

In [None]:
lr_model2.predict([[0,0,0,1]])

In [None]:
lr_model2.predict_proba([[0,0,0,1]])

In [None]:
pred_probs = lr_model2.predict_proba([[0,0,0,1]]).flat

We can find the most likely classes

In [None]:
np.argsort(pred_probs)[::-1]

Finally, we can pair the predicted probabilities with each candidate class

In [None]:
pred_str = ""
for class_i in np.argsort(pred_probs)[::-1]:
    pred_str += f"""
    {lr_model2.target_names[class_i]}: {pred_probs[class_i]:g}
    """
print(pred_str)