In [3]:
import pandas as pd
import keras
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TerminateOnNaN, CSVLogger
from tensorflow.keras.metrics import AUC
from glob import glob
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import (Conv2D, GlobalMaxPool2D, GlobalAveragePooling2D,
                                     Dropout, Flatten, Dense, Activation, Dropout,
                                     BatchNormalization, MaxPooling2D)
from tensorflow.keras.applications.mobilenet import MobileNet
from keras.applications.densenet import DenseNet121, preprocess_input
from tensorflow.keras import regularizers, optimizers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.callbacks import (ModelCheckpoint, LearningRateScheduler, 
                                        EarlyStopping, CSVLogger, TerminateOnNaN,
                                       ReduceLROnPlateau)
from tensorflow.keras.models import load_model
from tensorflow.keras.metrics import AUC
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
pd.set_option('display.notebook_repr_html', True)

In [4]:
test_model = load_model(r'C:\Users\Yaroslavel\ZalevskiyDiplom\models\mobile_net_cnn1.h5')

In [6]:
final_dx = pd.read_pickle('ZalevskiyDiplom/dataframes/fixed_final_df.pkl')
train_set = pd.read_pickle('ZalevskiyDiplom/sample/new_train_set.pkl')
valid_set = pd.read_pickle('ZalevskiyDiplom/sample/new_valid_set.pkl')
train_set.sample(5)

Unnamed: 0,filename,disease,followup,patientID,age,gender,viewposition,width,height,x,path,Atelectasis,Consolidation,Infiltration,Pneumothorax,Edema,Emphysema,Fibrosis,Effusion,Pneumonia,Pleural_Thickening,Cardiomegaly,Nodule,Mass,Hernia,Crossed Disease,array_disease
32594,00008522_017.png,No Finding,17,8522,60,F,AP,2500,2048,0.168,K:/DiplomZalevskiy/data/images/00008522_017.png,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
36493,00009616_001.png,No Finding,1,9616,65,M,AP,2500,2048,0.168,K:/DiplomZalevskiy/data/images/00009616_001.png,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
13447,00003495_000.png,No Finding,0,3495,47,F,PA,2021,2010,0.194311,K:/DiplomZalevskiy/data/images/00003495_000.png,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
90855,00022624_000.png,Fibrosis,0,22624,61,F,PA,2516,3056,0.139,K:/DiplomZalevskiy/data/images/00022624_000.png,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]"
54946,00013762_000.png,No Finding,0,13762,37,F,AP,2500,2048,0.168,K:/DiplomZalevskiy/data/images/00013762_000.png,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


In [95]:
path_to_images = 'K:/DiplomZalevskiy/data/images/'
disease_labels = ['Atelectasis',
       'Consolidation', 'Infiltration', 'Pneumothorax', 'Edema', 'Emphysema',
       'Fibrosis', 'Effusion', 'Pneumonia', 'Pleural_Thickening',
       'Cardiomegaly', 'Nodule', 'Mass', 'Hernia']

image_generator = ImageDataGenerator(rescale=1./255.,
                         samplewise_center=True,
                         samplewise_std_normalization=True,
                         horizontal_flip=False,
                         vertical_flip=False,
                         height_shift_range=0.1,
                         width_shift_range=0.1,
                         rotation_range=5,
                         shear_range=0.1,
                         fill_mode='reflect',
                         zoom_range=0.15)




valid_generator = image_generator.flow_from_dataframe(
    dataframe=valid_set,
    directory=path_to_images,
    x_col="filename",
    y_col=disease_labels,
    classes=disease_labels,
    target_size=(224, 224),
    seed=42,
    batch_size=256,
    class_mode='raw',
    color_mode='rgb',
    shuffle=False)

x_train, y_train = next(valid_generator)




Found 10958 validated image filenames.


In [11]:
# Коллбеки
model_checkpoint = ModelCheckpoint(
    filepath = 'mobilenet_cnn_{epoch:02d}_{val_loss:.2f}.hdf5',
    verbose = 1,
    monitor = 'val_loss',
    save_best_only= True)

reduce_lron = ReduceLROnPlateau(monitor='val_loss', 
                                factor=0.1, 
                                patience=3,
                                verbose=1, mode="min", 
                                min_lr=1e-5)

auc = AUC()
#model_checkpoint = ModelCheckpoint(..) # Описан ранее
#reduce_lron = ReduceLROnPlateau(...) # Описан ранее
ter_by_nan = TerminateOnNaN() 
logger = CSVLogger('mobienet_best_logger.cvs')
callbacks_lst = [reduce_lron,ter_by_nan,logger]

In [12]:
from keras.applications.mobilenet import MobileNet
from keras.applications.densenet import DenseNet
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten
from keras.models import Sequential
from keras import optimizers, callbacks, regularizers
mobile_net_cnn = Sequential()
base_mobilenet_cnn = MobileNet(input_shape = (128,128,3),
                         include_top = False,
                         weights = None,pooling='avg')
mobile_net_cnn.add(base_mobilenet_cnn)
mobile_net_cnn.add(Dropout(0.5))
mobile_net_cnn.add(Dense(512))
mobile_net_cnn.add(Dropout(0.5))
mobile_net_cnn.add(Dense(14, activation = 'sigmoid'))
mobile_net_cnn.compile(optimizer='adam', loss = 'binary_crossentropy',
                      metrics=['categorical_accuracy', auc])
mobile_net_cnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenet_1.00_128 (Functio  (None, 1024)             3228864   
 nal)                                                            
                                                                 
 dropout (Dropout)           (None, 1024)              0         
                                                                 
 dense (Dense)               (None, 512)               524800    
                                                                 
 dropout_1 (Dropout)         (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 14)                7182      
                                                                 
Total params: 3,760,846
Trainable params: 3,738,958
Non-trainable params: 21,888
_________________________________________

In [102]:
# обучение
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
mobilenet_history = mobile_net_cnn.fit_generator(train_generator, epochs= 20,
                                                steps_per_epoch = STEP_SIZE_TRAIN,
                                                validation_data = valid_generator,
                                                validation_steps = STEP_SIZE_VALID,
                                                callbacks = callbacks_lst,
                                                shuffle = False)

  mobilenet_history = mobile_net_cnn.fit_generator(train_generator, epochs= 20,


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
 295/1677 [====>.........................] - ETA: 16:39 - loss: 0.1500 - categorical_accuracy: 0.2250 - auc_4: 0.8342

KeyboardInterrupt: 

In [13]:
# # saving model
# import os
# number_obvervation = len(os.listdir('ZalevskiyDiplom/models'))  
# mobile_net_cnn.save(f'ZalevskiyDiplom/models/mobile_net_cnn{number_obvervation}.h5')
    

NameError: name 'model' is not defined

In [154]:
# Algorithm Prediction per Class - batch size = 1024
validation_generator = image_generator.flow_from_dataframe(
    dataframe=valid_set,
    directory=path_to_images,
    x_col="filename",
    y_col=disease_labels,
    classes=disease_labels,
    target_size=(128, 128),
    seed=42,
    batch_size=1024,
    class_mode='raw',
    color_mode='rgb',
    shuffle=False)



Found 32874 validated image filenames.


In [169]:
from sklearn.metrics import precision_recall_curve, f1_score, auc, roc_curve, confusion_matrix, classification_report
import seaborn as sns
valX, valY = validation_generator.next()
y_predict = np.argmax(mobile_net_cnn.predict(valX),axis=1)
matrix = confusion_matrix(np.argmax(valY,axis=1), y_predict)



In [170]:
matrix

array([[ 84,   0, 502,  19,   0,   0,   0,  43,   0,   0,   4,   4,  21,
          0],
       [  1,   0,  17,   1,   0,   0,   0,   4,   0,   0,   0,   0,   2,
          0],
       [  6,   0, 107,   3,   0,   0,   0,  14,   0,   0,   0,   0,   3,
          0],
       [  3,   0,  16,   6,   0,   0,   0,   6,   0,   0,   0,   0,   3,
          0],
       [  1,   0,   9,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0],
       [  1,   0,   7,   3,   0,   0,   0,   0,   0,   0,   0,   0,   1,
          0],
       [  1,   0,   5,   0,   0,   0,   0,   0,   0,   0,   0,   0,   2,
          0],
       [  2,   0,  22,   2,   0,   0,   0,  16,   0,   0,   0,   0,   0,
          0],
       [  0,   0,   4,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0],
       [  4,   0,  11,   4,   0,   0,   0,   1,   0,   0,   0,   0,   0,
          0],
       [  0,   0,   8,   0,   0,   0,   0,   0,   0,   0,   1,   0,   0,
          0],
       [  0,   0,  27,   1,   0,   0,   0, 