In [1]:
import sklearn
import numpy as np
import pandas as pd
import xgboost as xgb
import tensorflow as tf


Wine data set

https://scikit-learn.org/stable/datasets/toy_dataset.html#wine-dataset

In [2]:
from sklearn import datasets

wine = datasets.load_wine()
X = wine.data
y = wine.target

In [3]:
X.shape

(178, 13)

In [4]:
y.shape

(178,)

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.2, random_state=42)

# Softmax Regression With SKLearn

In [104]:
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(
    multi_class='multinomial',
    tol=1e-4,
    solver='newton-cg',
    max_iter=1000,
    verbose=1,
    penalty='l2'
).fit(X_train, y_train)


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s finished


In [105]:
from sklearn.metrics import log_loss
import math

y_prob = lr_model.predict_proba(X)
mlogloss = log_loss(y, y_prob)

print("Logloss: {}".format(mlogloss))


Logloss: 0.03898258791110504


In [106]:
from sklearn.metrics import classification_report, confusion_matrix
CATEGORIES = [0, 1, 2]


In [107]:
y_hat = lr_model.predict(X)

In [108]:
print(classification_report(y_true=y, y_pred=y_hat, digits=4))

              precision    recall  f1-score   support

           0     1.0000    0.9831    0.9915        59
           1     0.9861    1.0000    0.9930        71
           2     1.0000    1.0000    1.0000        48

    accuracy                         0.9944       178
   macro avg     0.9954    0.9944    0.9948       178
weighted avg     0.9945    0.9944    0.9944       178



In [109]:
cm = confusion_matrix(y_true=y, y_pred=y_hat, labels=CATEGORIES)

In [110]:
cm

array([[58,  1,  0],
       [ 0, 71,  0],
       [ 0,  0, 48]])

# Gradient Boost Tree with Xgboost

In [35]:
from xgboost import XGBClassifier

my_model = XGBClassifier(
    base_score=0.5,
    objective='multi:softmax',
    num_class=3,
    
    gamma=0,
    max_depth=3,
    learning_rate=0.3, 
    min_child_weight=1,
    n_estimators=100,
    subsample=0.8,
    colsample_bynode=1,
    colsample_bytree=0.8,
    reg_alpha=0,
    reg_lambda=1,

    random_state=0,
    booster='gbtree',
    tree_method='hist',
    seed=1234,
    missing=None,
    verbosity=1
)

print(my_model.get_xgb_params())

trained = my_model.fit(
    X_train, y_train, early_stopping_rounds=5,
    # the last metric is used for early stopping
    eval_metric=["merror", "mlogloss"],
    eval_set=[(X_train, y_train), (X_validation, y_validation)], verbose=True)


{'objective': 'multi:softmax', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': None, 'colsample_bynode': 1, 'colsample_bytree': 0.8, 'gamma': 0, 'gpu_id': None, 'interaction_constraints': None, 'learning_rate': 0.3, 'max_delta_step': None, 'max_depth': 3, 'min_child_weight': 1, 'monotone_constraints': None, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 0, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': None, 'subsample': 0.8, 'tree_method': 'hist', 'validate_parameters': None, 'verbosity': 1, 'num_class': 3, 'seed': 1234}
[0]	validation_0-merror:0.03521	validation_0-mlogloss:0.76320	validation_1-merror:0.05556	validation_1-mlogloss:0.77069
[1]	validation_0-merror:0.00704	validation_0-mlogloss:0.55670	validation_1-merror:0.00000	validation_1-mlogloss:0.56952
[2]	validation_0-merror:0.00704	validation_0-mlogloss:0.41889	validation_1-merror:0.00000	validation_1-mlogloss:0.43899
[3]	validation_0-merror:0.00000	validation_0-mlogloss:0.31685	validation_1-merror

# Softmax Regression In Neural Network Style

In [37]:
import tensorflow as tf
from tensorflow import keras as keras
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Input, Reshape, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import SparseCategoricalAccuracy



In [89]:
def build_softmax_regression():
    inputs = Input(shape=(13,))
    l = inputs    
    l = Dense(3, activation="softmax", name='fc')(l)
    my_model = Model(inputs=inputs, outputs=l)
    print(my_model.summary())
    return my_model

def model_train(X_train, y_train, X_val, y_val, model):
    print(model.summary())
    early_stop = EarlyStopping(
        monitor="val_loss", min_delta=1e-5, patience=3,
        verbose=0, mode="auto", baseline=None, restore_best_weights=True
    )
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=2, min_lr=1e-4)

    callbacks_list = [early_stop, reduce_lr]

    # optimizer: gradient descent implementation
    adam_wn = Adam(learning_rate=0.1)
    
    # compile model with loss and optimizer; loss function needs to match the intention and the output of the last layer
    model.compile(
        loss="sparse_categorical_crossentropy", optimizer=adam_wn,
        metrics=[SparseCategoricalAccuracy()]
    )
    
    # kick off model training
    return model.fit(
        x=X_train, y=y_train, validation_data=(X_val, y_val), batch_size=178,
        epochs=1000, verbose=1, shuffle=True,
        callbacks=callbacks_list)


In [91]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(X_train)

In [92]:
X_train_scaled = scaler.transform(X_train)
X_validation_scaled = scaler.transform(X_validation)

In [111]:

my_tf_model = build_softmax_regression()

model_train(X_train_scaled, y_train, X_validation_scaled, y_validation, my_tf_model)

Model: "model_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_17 (InputLayer)        [(None, 13)]              0         
_________________________________________________________________
fc (Dense)                   (None, 3)                 42        
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________
None
Model: "model_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_17 (InputLayer)        [(None, 13)]              0         
_________________________________________________________________
fc (Dense)                   (None, 3)                 42        
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000

<keras.callbacks.History at 0x7fd749cf3690>