In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import pickle
import h5py

In [None]:
args = {
    'db' : 'output/caltech-101/features.hdf5',
    'models' : 'caltech-101.cpickle',
    'jobs' : 1,
}

In [None]:
# open the hdf5 database for reading then determine the index of
# the training and testing split, provided this data
# was alrdy shuffled

db = h5py.File(args["db"], "r")
i = int(db["labels"].shape[0] * 0.75)

In [None]:
# define the set of parameters that we want to tune then start a
# grid search where we evaluate our model for each value of C
print("[INFO] tuning hyperparameters...")
params = {"C": [0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]}
model = GridSearchCV(LogisticRegression(solver="lbfgs", max_iter=800,multi_class="auto"), params, cv=3, n_jobs=args["jobs"])
model.fit(db["features"][:i], db["labels"][:i])
print("[INFO] best hyperparameters: {}".format(model.best_params_))


# evaluate the model
print("[INFO] evaluating...")
preds = model.predict(db["features"][i:])
print(classification_report(db["labels"][i:], preds,
target_names=db["label_names"]))

In [None]:
# serialize the model to the disk
print("[INFO] saving model ...")
f =open(args['models'], 'wb')
f.write(pickle.dumps(model.best_estimator_))
f.close()

# close db
db.close()