In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import os

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import keras as ks

from keras.models import Sequential, Model
from keras.layers import Conv2D, Dense, MaxPooling2D, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory


# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

        
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_original = pd.read_csv("../input/siim-isic-melanoma-classification/train.csv", dtype=str)
test_original = pd.read_csv('../input/siim-isic-melanoma-classification/test.csv', dtype=str)

In [None]:
len(train_original)

In [None]:
train_original.head(1)

In [None]:
test_original.head(1)

In [None]:
# original image

image = Image.open("../input/siim-isic-melanoma-classification/jpeg/test/ISIC_0052060.jpg")
imgplot = plt.imshow(image)
plt.show()

In [None]:
# resized image

image = Image.open("../input/siic-isic-224x224-images/test/ISIC_0052060.png")
imgplot = plt.imshow(image)
plt.show()

In [None]:
train_original

In [None]:
part_true = train_original[train_original["target"] == '1']
part_false = train_original[train_original["target"] == '0'].sample(len(part_true) * 2)

In [None]:
print(len(part_true))
print(len(part_false))

In [None]:
train_balanced = pd.concat([part_true, part_false])
len(train_balanced)

In [None]:
a = train_balanced.sample(round(len(train_balanced) * 0.7))
b = train_balanced[~train_balanced["image_name"].isin(a["image_name"])]
print(len(a))
print(len(b))

In [None]:
# t = train_balanced.sample(n=300)
t = train_balanced
t["file_name"] = t["image_name"] + ".png"

            
train_set = t.sample(round(len(t) * 0.7))
val_set = t[~t["image_name"].isin(train_set["image_name"])]


# val_set_mock = t[190:200]
# val_set_mock_file_names = np.array("../input/siic-isic-224x224-images/train/" + val_set_mock["file_name"])
# val_set_mock_images = np.array([np.array(Image.open(fname)) for i, fname in enumerate(val_set_mock_file_names)])
# val_set_mock_labels = val_set_mock["diagnosis"]


test_set = test_original.sample(n=500)
test_set["file_name"] = test_set["image_name"] + ".png"

images = np.array("../input/siic-isic-224x224-images/test/" + test_set["file_name"])
test_set_images = np.array([np.array(Image.open(fname)) for i, fname in enumerate(images)])

In [None]:
print(len(train_set))
print(len(val_set))
# print(len(val_set_mock))
print(len(test_set))

In [None]:
# Number of categories
truth = train_set["diagnosis"].unique()
truth

In [None]:
img_width, img_height = 224, 224
nb_train_samples = len(train_set)
nb_validation_samples = len(val_set)
epochs = 10
batch_size = 64
n_classes = truth.size

In [None]:

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    zoom_range=0.2,
    rotation_range = 5,
    horizontal_flip=True)

train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_set, 
    directory = "../input/siic-isic-224x224-images/train", 
    x_col = "file_name", 
    # y_col = "diagnosis", 
    y_col = "target",
    # class_mode = "categorical", 
    class_mode = "binary", 
    target_size = (img_width, img_height), 
    batch_size = batch_size,
    validate_filenames = False)


val_datagen = ImageDataGenerator(rescale=1. / 255)
val_generator = val_datagen.flow_from_dataframe(
    dataframe = val_set, 
    directory = "../input/siic-isic-224x224-images/train", 
    x_col = "file_name",
    # y_col = "diagnosis",
    y_col = "target",
    # class_mode = "categorical", 
    class_mode = "binary", 
    target_size = (img_width, img_height), 
    batch_size = batch_size,
    validate_filenames = False)

In [None]:
test_datagen = ImageDataGenerator(rescale=1. / 255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_set,
    directory="../input/siic-isic-224x224-images/test",
    x_col="file_name",
    y_col=None,
    batch_size=1,
    seed=420,
    class_mode=None,
    target_size=(img_width, img_height),
    validate_filenames = False
)

In [None]:
base_model = ks.applications.densenet.DenseNet169(
    include_top=False, 
    input_shape=(img_width,img_height,3), 
    weights = '../input/densenet-keras/DenseNet-BC-169-32-no-top.h5')

base_model.trainable = True

In [None]:
add_model = Sequential()
add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(Dense(256, activation='relu'))
add_model.add(Dropout(0.2))
add_model.add(Dense(1, activation='softmax'))

model = Model(inputs = base_model.input, outputs = add_model(base_model.output))
model.compile(optimizer='adam',
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
history = model.fit_generator(
    train_generator,
    epochs=epochs,
    validation_data=val_generator, 
    validation_steps=nb_validation_samples // batch_size)

In [None]:
score = model.evaluate_generator(val_generator)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
predict=model.predict_generator(test_generator)

In [None]:
submission = pd.DataFrame(
    {'image_name': test_set["image_name"], 'target': predict.flatten()},
    columns = ['image_name', 'target']
)
submission.to_csv('sSubmission.csv', index=False)

In [None]:
predict

In [None]:
submission

In [None]:
# prediction = model.predict(val_set_mock_images)

In [None]:
# p = np.round(prediction)
# indexes = np.where(p == 1)[1]
# comparisons = (truth[indexes]) == val_set_mock_labels
# true_predictions = np.sum(comparisons)
# accuracy = true_predictions * 100 / len(val_set_mock_labels)
# accuracy