In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import argparse
import pickle
import h5py

In [8]:
args = {
    'db': 'myFeatures_new.hdf5',
    'model': 'animals.cpickle',
    'jobs': -1, 
}

In [9]:
# open HDF5 dataset for reading then determine the index of the training and
# testing split provided this data was already shuffled 
db = h5py.File(args['db'], mode='r')
i = int(db['labels'].shape[0] * 0.75)

In [10]:
print('[INFO] tuning hyperparameters')
params = {'C': [0.1, 1.0, 10.0, 100.0, 1000.0, 10_000.0]}
model = GridSearchCV(LogisticRegression(solver="lbfgs", multi_class='auto'),
                     param_grid=params, cv=3, n_jobs=args['jobs'])
model.fit(db['features'][:i], db['labels'][:i])
print(f'[INFO] Best Hyperparameters are: {model.best_params_}')

[INFO] tuning hyperparameters
[INFO] Best Hyperparameters are: {'C': 100.0}


In [16]:
# evaluate the model
target_names = db['label_names'][:]
target_names = [name.decode('utf8') for name in target_names]

print('[INFO] evaluating the model......')
preds = model.predict(db['features'][i:])
print(classification_report(db['labels'][i:], preds, 
                            target_names=target_names))

[INFO] evaluating the model......
              precision    recall  f1-score   support

        cats       0.98      1.00      0.99       248
        dogs       1.00      0.97      0.98       250
       panda       0.99      1.00      1.00       252

    accuracy                           0.99       750
   macro avg       0.99      0.99      0.99       750
weighted avg       0.99      0.99      0.99       750



In [17]:
# serialise the model to disk
f = open(args['model'], mode='wb')
f.write(pickle.dumps(model.best_estimator_))
f.close()
db.close()