In [None]:
# organize imports
from __future__ import print_function

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import numpy as np
import h5py
import os
import json
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
config={
  "model"           : "inceptionv3",
  "weights"         : "imagenet",
  "include_top"     : False,
  "features_path"   : "../input/output/IOdev/inceptionv3/features.h5",
  "labels_path"     : "../input/output/IOdev/inceptionv3/labels.h5",
  "results"         : "output/IOdev/inceptionv3/results.txt",
  "classifier_path" : "output/IOdev/inceptionv3/classifier.pickle",
  "model_path"      : "../input/output/IOdev/inceptionv3/model",

  "test_size"       : 0.15,
  "seed"            : 9,
  "num_classes"     : 14
}

In [None]:
# config variables
test_size     = config["test_size"]
seed      = config["seed"]
features_path   = config["features_path"]
labels_path   = config["labels_path"]
results     = config["results"]
classifier_path = config["classifier_path"]
train_path    = ['Effusion', 'Atelectasis', 'Edema', 'Hernia', 'Mass', 'Nodule', 'Fibrosis', 'Emphysema', 'Cardiomegaly', 'Consolidation', 'Infiltration', 'Pneumonia', 'Pneumothorax', 'Pleural_Thickening']
num_classes   = config["num_classes"]
classifier_path = config["classifier_path"]

In [None]:
# import features and labels
h5f_data  = h5py.File(features_path, 'r')
h5f_label = h5py.File(labels_path, 'r')

features_string = h5f_data['dataset_1']
labels_string   = h5f_label['dataset_1']

features = np.array(features_string)
labels   = np.array(labels_string)

h5f_data.close()
h5f_label.close()

In [None]:
# verify the shape of features and labels
print ("[INFO] features shape: {}".format(features.shape))
print ("[INFO] labels shape: {}".format(labels.shape))


In [None]:
print ("[INFO] training started...")
# split the training and testing data
(trainData, testData, trainLabels, testLabels) = train_test_split(np.array(features),
                                                                  np.array(labels),
                                                                  test_size=test_size,
                                                                  random_state=seed)

print ("[INFO] splitted train and test data...")
print ("[INFO] train data  : {}".format(trainData.shape))
print ("[INFO] test data   : {}".format(testData.shape))
print ("[INFO] train labels: {}".format(trainLabels.shape))
print ("[INFO] test labels : {}".format(testLabels.shape))

In [None]:
del features_string,labels_string,features,labels

In [None]:
# use logistic regression as the model
print ("[INFO] creating model...")
model = LogisticRegression(random_state=seed)
model.fit(trainData, trainLabels)


In [None]:
os.mkdir("output")
os.mkdir("output/IOdev")
os.mkdir("output/IOdev/inceptionv3/")

In [None]:
# use rank-1 and rank-5 predictions
print ("[INFO] evaluating model...")
f = open(results, "w")
rank_1 = 0
rank_5 = 0

In [None]:
# evaluate the model of test data
preds = model.predict(testData)



In [None]:
# write the classification report to file
f.write("{}\n".format(classification_report(testLabels, preds)))
f.close()

In [None]:
# dump classifier to file
print ("[INFO] saving model...")
pickle.dump(model, open(classifier_path, 'wb'))


In [None]:
#  display the confusion matrix
print ("[INFO] confusion matrix")

# get the list of training lables
labels = sorted(list(train_path))

# plot the confusion matrix
cm = confusion_matrix(testLabels, preds)
sns.heatmap(cm,
            annot=True,
            cmap="Set2")
plt.show()