### Online Binary Classification with Time Series Dataset

- Objective: modify David Ziganto's introduction to online learning to learn with time series dataset
- Dataset source: https://archive.ics.uci.edu/ml/datasets/bank+marketing
- Original Post: https://dziganto.github.io/data%20science/online%20learning/python/scikit-learn/An-Introduction-To-Online-Machine-Learning/
- Referenced Notebook: https://github.com/dziganto/dziganto.github.io/blob/master/_notebooks/Online_Learning.ipynb

In [1]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.metrics import log_loss, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('bank-additional-full.csv', sep=';')

Cross Validation with Time Series Split on the Training Set

In [7]:
#Creating unseen dataset with last 50 instances to simulate future data instances to demo online learning
unseen_data = df.iloc[len(df)-50:len(df),:]
seen_data = df.iloc[:len(df)-50,:]

In [8]:
#Simplifying df to a few numeric features to keep demo simple
X = seen_data[['age', 'duration', 'campaign', 'pdays']]
#Binarizing y 
y = [1 if i == 'yes' else 0 for i in seen_data['y'] ]
X = np.array(X)
y = np.array(y)

In [15]:
#Showing first five rows of x and y
print('First five rows of X')
print(X[:5,:])
print('\n')
print('First five rows of y')
print(y[:5])
print('\n')

First five rows of X
[[ 56 261   1 999]
 [ 57 149   1 999]
 [ 37 226   1 999]
 [ 40 151   1 999]
 [ 56 307   1 999]]


First five rows of y
[0 0 0 0 0]




In [16]:
#Replicating X and y for unseen future data
X_new = unseen_data[['age', 'duration', 'campaign', 'pdays']]
#Binarizing y 
y_new = [1 if i == 'yes' else 0 for i in unseen_data['y'] ]

X_new = np.array(X_new)
y_new = np.array(y_new)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X,y, 
                                                    test_size = 0.33, 
                                                    random_state=2, 
                                                    shuffle = None)

In [18]:
print('X Train Shape: ', X_train.shape)
print('y Train Shape: ', y_train.shape)
print('X Test Shape: ', X_test.shape)
print('y Test Shape: ', y_test.shape)

X Train Shape:  (27562, 4)
y Train Shape:  (27562,)
X Test Shape:  (13576, 4)
y Test Shape:  (13576,)


Scaling Training and Test Sets:

In [19]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)



Nested CV with Time Series Split

In [20]:
estimator = SGDClassifier()

In [21]:
#Evaluating both online SVM (hinge) and Logistic Regression (huber) 
#Notice target classes are highly imbalanced
param_search = {'loss' : ['hinge','huber'], 
                'alpha':[0.25,0.5,0.75], 
                'penalty':['l2'],
                'shuffle':[True], 
                'learning_rate':['optimal'],
                'class_weight':['balanced'],
                'verbose':[1], 
               'max_iter':[100], 
               'tol': [1e-3], 
               'random_state':[22]}

In [22]:
cv = TimeSeriesSplit(n_splits=2)

In [23]:
gs = GridSearchCV(estimator=estimator, cv=cv, param_grid=param_search, scoring = 'accuracy')

In [24]:
gs.fit(X_train,y_train)

-- Epoch 1
Norm: 0.73, NNZs: 4, Bias: -0.573634, T: 9188, Avg. loss: 0.425316
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 0.74, NNZs: 4, Bias: -0.558219, T: 18376, Avg. loss: 0.390935
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 0.73, NNZs: 4, Bias: -0.557949, T: 27564, Avg. loss: 0.396620
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 0.74, NNZs: 4, Bias: -0.559139, T: 36752, Avg. loss: 0.397546
Total training time: 0.01 seconds.
-- Epoch 5
Norm: 0.73, NNZs: 4, Bias: -0.561580, T: 45940, Avg. loss: 0.398684
Total training time: 0.01 seconds.
-- Epoch 6
Norm: 0.74, NNZs: 4, Bias: -0.560068, T: 55128, Avg. loss: 0.396829
Total training time: 0.01 seconds.
-- Epoch 7
Norm: 0.74, NNZs: 4, Bias: -0.560469, T: 64316, Avg. loss: 0.396647
Total training time: 0.01 seconds.
Convergence after 7 epochs took 0.01 seconds
-- Epoch 1
Norm: 0.75, NNZs: 4, Bias: -0.554143, T: 18375, Avg. loss: 0.400382
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 0.75, NNZs: 4, Bias: -0.557

GridSearchCV(cv=TimeSeriesSplit(max_train_size=None, n_splits=2),
       error_score='raise-deprecating',
       estimator=SGDClassifier(alpha=0.0001, average=False, class_weight=None,
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=None,
       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
       power_t=0.5, random_state=None, shuffle=True, tol=None,
       validation_fraction=0.1, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'loss': ['hinge', 'huber'], 'alpha': [0.25, 0.5, 0.75], 'penalty': ['l2'], 'shuffle': [True], 'learning_rate': ['optimal'], 'class_weight': ['balanced'], 'verbose': [1], 'max_iter': [100], 'tol': [0.001], 'random_state': [22]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [25]:
print('Best Estimator Hyperparameters:')
gs.best_estimator_

Best Estimator Hyperparameters:


SGDClassifier(alpha=0.75, average=False, class_weight='balanced',
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=100,
       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
       power_t=0.5, random_state=22, shuffle=True, tol=0.001,
       validation_fraction=0.1, verbose=1, warm_start=False)

In [26]:
#Predicting on Test set
y_pred = gs.best_estimator_.predict(X_test)

In [27]:
print('Accuracy on Test Set:', accuracy_score(y_test,y_pred))

Accuracy on Test Set: 0.8992339422510313


Creating Final Model 

In [28]:
final_estimator = SGDClassifier(alpha=0.75, average=False, class_weight='balanced',
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=100,
       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
       power_t=0.5, shuffle=True, tol=0.001,
       validation_fraction=0.1, verbose=1, warm_start=False, random_state = 22)

In [29]:
#Training the final model on the entire seen dataset
final_estimator.fit(X, y)

-- Epoch 1
Norm: 0.21, NNZs: 4, Bias: 3.372936, T: 41138, Avg. loss: 454.982334
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 0.17, NNZs: 4, Bias: 3.366401, T: 82276, Avg. loss: 12.507060
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 0.15, NNZs: 4, Bias: 3.362384, T: 123414, Avg. loss: 7.452187
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 0.15, NNZs: 4, Bias: 3.359697, T: 164552, Avg. loss: 5.310327
Total training time: 0.03 seconds.
-- Epoch 5
Norm: 0.14, NNZs: 4, Bias: 3.357477, T: 205690, Avg. loss: 4.294072
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 0.14, NNZs: 4, Bias: 3.355670, T: 246828, Avg. loss: 3.550125
Total training time: 0.04 seconds.
-- Epoch 7
Norm: 0.14, NNZs: 4, Bias: 3.354170, T: 287966, Avg. loss: 3.044687
Total training time: 0.04 seconds.
-- Epoch 8
Norm: 0.13, NNZs: 4, Bias: 3.352866, T: 329104, Avg. loss: 2.778474
Total training time: 0.05 seconds.
-- Epoch 9
Norm: 0.13, NNZs: 4, Bias: 3.351684, T: 370242, Avg. loss: 2.396802
Total tr

Norm: 0.09, NNZs: 4, Bias: 3.330331, T: 3208764, Avg. loss: 0.686489
Total training time: 0.41 seconds.
-- Epoch 79
Norm: 0.09, NNZs: 4, Bias: 3.330205, T: 3249902, Avg. loss: 0.695707
Total training time: 0.41 seconds.
-- Epoch 80
Norm: 0.09, NNZs: 4, Bias: 3.330080, T: 3291040, Avg. loss: 0.685401
Total training time: 0.42 seconds.
-- Epoch 81
Norm: 0.09, NNZs: 4, Bias: 3.329958, T: 3332178, Avg. loss: 0.691703
Total training time: 0.43 seconds.
-- Epoch 82
Norm: 0.09, NNZs: 4, Bias: 3.329838, T: 3373316, Avg. loss: 0.684070
Total training time: 0.44 seconds.
-- Epoch 83
Norm: 0.09, NNZs: 4, Bias: 3.329718, T: 3414454, Avg. loss: 0.670122
Total training time: 0.45 seconds.
-- Epoch 84
Norm: 0.09, NNZs: 4, Bias: 3.329600, T: 3455592, Avg. loss: 0.683122
Total training time: 0.45 seconds.
-- Epoch 85
Norm: 0.09, NNZs: 4, Bias: 3.329481, T: 3496730, Avg. loss: 0.670157
Total training time: 0.46 seconds.
-- Epoch 86
Norm: 0.09, NNZs: 4, Bias: 3.329365, T: 3537868, Avg. loss: 0.670999
Tot



SGDClassifier(alpha=0.75, average=False, class_weight='balanced',
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=100,
       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
       power_t=0.5, random_state=22, shuffle=True, tol=0.001,
       validation_fraction=0.1, verbose=1, warm_start=False)

In [30]:
final_estimator.coef_

array([[-0.0018559 ,  0.00575112, -0.08175267, -0.00336232]])

In [31]:
final_estimator.fit(X_new, y_new)

-- Epoch 1
Norm: 23.85, NNZs: 4, Bias: 0.346484, T: 50, Avg. loss: 24592.327698
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 25.08, NNZs: 4, Bias: 0.306976, T: 100, Avg. loss: 3624.928276
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 19.28, NNZs: 4, Bias: 0.266718, T: 150, Avg. loss: 1811.045660
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 14.64, NNZs: 4, Bias: 0.245833, T: 200, Avg. loss: 1478.066820
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 12.57, NNZs: 4, Bias: 0.229612, T: 250, Avg. loss: 1110.732229
Total training time: 0.00 seconds.
-- Epoch 6
Norm: 8.20, NNZs: 4, Bias: 0.216060, T: 300, Avg. loss: 828.798631
Total training time: 0.00 seconds.
-- Epoch 7
Norm: 10.45, NNZs: 4, Bias: 0.198915, T: 350, Avg. loss: 1048.322157
Total training time: 0.00 seconds.
-- Epoch 8
Norm: 7.92, NNZs: 4, Bias: 0.189090, T: 400, Avg. loss: 669.862176
Total training time: 0.00 seconds.
-- Epoch 9
Norm: 7.93, NNZs: 4, Bias: 0.179680, T: 450, Avg. loss: 616.754742
Total 

SGDClassifier(alpha=0.75, average=False, class_weight='balanced',
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=100,
       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
       power_t=0.5, random_state=22, shuffle=True, tol=0.001,
       validation_fraction=0.1, verbose=1, warm_start=False)

In [32]:
final_estimator.coef_

array([[-2.85931351,  1.14843782, -0.24904998, -0.52056329]])