In [None]:
import os

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import keras as ks

from keras.models import Sequential, Model
from keras.layers import Conv2D, Dense, MaxPooling2D, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image
from tensorflow.keras.applications.densenet import preprocess_input




train_original = pd.read_csv("../input/siim-isic-melanoma-classification/train.csv", dtype=str)
test_original = pd.read_csv('../input/siim-isic-melanoma-classification/test.csv', dtype=str)




part_true = train_original[train_original["target"] == '1']
part_false = train_original[train_original["target"] == '0'].sample(len(part_true) * 2)
train_balanced = pd.concat([part_true, part_false])




t = train_balanced
t["file_name"] = t["image_name"] + ".png"
            
train_set = t.sample(round(len(t) * 0.7))
val_set = t[~t["image_name"].isin(train_set["image_name"])]


test_set = test_original
test_set["file_name"] = test_set["image_name"] + ".png"





img_width, img_height = 224, 224
nb_validation_samples = len(val_set)
epochs = 10
batch_size = 128






train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    zoom_range=0.2,
    rotation_range = 5,
    horizontal_flip=True)

train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_set, 
    directory = "../input/siic-isic-224x224-images/train", 
    x_col = "file_name", 
    # y_col = "diagnosis", 
    y_col = "target",
    # class_mode = "categorical", 
    class_mode = "binary", 
    target_size = (img_width, img_height), 
    batch_size = batch_size,
    validate_filenames = False)








val_datagen = ImageDataGenerator(rescale=1. / 255)
val_generator = val_datagen.flow_from_dataframe(
    dataframe = val_set, 
    directory = "../input/siic-isic-224x224-images/train", 
    x_col = "file_name",
    # y_col = "diagnosis",
    y_col = "target",
    # class_mode = "categorical", 
    class_mode = "binary", 
    target_size = (img_width, img_height), 
    batch_size = batch_size,
    validate_filenames = False)








test_datagen = ImageDataGenerator(rescale=1. / 255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_set,
    directory="../input/siic-isic-224x224-images/test",
    x_col="file_name",
    y_col=None,
    batch_size=1,
    seed=420,
    class_mode=None,
    target_size=(img_width, img_height),
    validate_filenames = False
)







base_model = ks.applications.densenet.DenseNet169(
    include_top=False, 
    input_shape=(img_width,img_height,3), 
    weights = '../input/densenet-keras/DenseNet-BC-169-32-no-top.h5')

base_model.trainable = False



add_model = Sequential()
add_model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
add_model.add(MaxPooling2D(pool_size=(2, 2)))
add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(Dense(256, activation='relu'))
add_model.add(Dropout(0.2))
add_model.add(Dense(1, activation='softmax'))



model = Model(inputs = base_model.input, outputs = add_model(base_model.output))
model.compile(optimizer='adam',
              loss='binary_crossentropy', 
              metrics=['accuracy'])









history = model.fit_generator(
    train_generator,
    epochs=epochs,
    validation_data=val_generator, 
    validation_steps=nb_validation_samples // batch_size)






predict=model.predict_generator(test_generator)


submission = pd.DataFrame(
    {'image_name': test_set["image_name"], 'target': predict.flatten()},
    columns = ['image_name', 'target']
)
submission.to_csv('submission_file.csv', index=False)


