In [None]:
!pip install -q focal_loss
# import os
import numpy as np
import pandas as pd
from keras.models import load_model, Sequential
from keras.optimizers import RMSprop
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from pylab import histogram, plot, show, interp, xlabel, ylabel
from cv2 import resize, imread, cvtColor, inpaint, threshold, morphologyEx, INTER_AREA, MORPH_BLACKHAT, \
	getStructuringElement, \
	THRESH_BINARY, INPAINT_TELEA, COLOR_BGR2GRAY
import random
from focal_loss import BinaryFocalLoss
# from keras.losses import BinaryCrossentropy
from pickle import load, dump

In [None]:

cfg = {
	'IM_SIZE': (288, 192),
	'NBR_BINS': 256,
	'EPOCH_MAIN': 100,
	'EPOCH_RANGE': 95,
	'NEURON_MAIN': 100,
	'NEURON_RANGE': 60,
	'LAYER_MAIN': 7,
	'LAYER_RANGE': 5,
	# 'LOSS': BinaryCrossentropy( label_smoothing=0.7 ),
	'LOSS': BinaryFocalLoss( gamma=3, label_smoothing=0.7 ),
	'CALLBACK': EarlyStopping( monitor='loss', patience=5 ),
	'TEST_DATA_PERCENTAGE': 10,
	'ITERATIONS': 5,
	'USE_OLD_DATA': False,
	'SAVE_NEW_DATA': False,
	# 'OFFLINE': True
	'OFFLINE': False
}

print( 'Imported Successfully!' )

In [None]:


f = "../input/siim-isic-melanoma-classification/"
fimtr = "../input/melanoma-merged-external-data-512x512-jpeg/512x512-dataset-melanoma/512x512-dataset-melanoma/"
fimte = "../input/melanoma-merged-external-data-512x512-jpeg/512x512-test/512x512-test/"
traindata = pd.read_csv( f+"train.csv" )
testdata = pd.read_csv( f+"test.csv" )
trainData = { 'image_name': [], 'sex': [], 'age_approx': [], 'anatom_site_general_challenge': [] }
testData = { 'image_name': [], 'sex': [], 'age_approx': [], 'anatom_site_general_challenge': [] }
trainData = { k: list( traindata[k] ) for k in ["image_name", "sex", "age_approx", "anatom_site_general_challenge"] }
testData = { k: list( testdata[k] ) for k in ["image_name", "sex", "age_approx", "anatom_site_general_challenge"] }
y = list( traindata['target'] )
del traindata
del testdata
print( 'Defined main variables successfully!' )
rm = []
old = cfg['USE_OLD_DATA']
save = cfg['SAVE_NEW_DATA']

if cfg['OFFLINE']:
	pics = ['ISIC_0015719', 'ISIC_0052212', 'ISIC_0068279', 'ISIC_0074268', 'ISIC_0074311']
	x = { k: [] for k in trainData.keys() }
	ytmp = []

	for pic in pics:
		i = trainData['image_name'].index( pic )
		[x[k].append( trainData[k][i] ) for k in trainData.keys()]
		ytmp.append( y[i] )

	trainData = x
	y = ytmp

In [None]:

uniqueanatom = list( set( trainData['anatom_site_general_challenge'] ) )
uniquesex = unique = list( set( trainData['sex'] ) )
try:
    uniqueanatom.remove( 'nan' )
except ValueError:
    pass
try:
    uniquesex.remove( 'nan' )
except ValueError:
    pass

def encode( d ):
    global uniqueanatom, uniquesex
    rd = d
    for i in range( len( d['anatom_site_general_challenge'] ) ):
        if d['anatom_site_general_challenge'][i] in uniqueanatom:
            rd['anatom_site_general_challenge'][i] = uniqueanatom.index( d['anatom_site_general_challenge'][i] )
        else:
            rd['anatom_site_general_challenge'][i] = 0

    for i in range( len( d['sex'] ) ):
        if d['sex'][i] in uniquesex:
            rd['sex'][i] = uniquesex.index( d['sex'][i] )
        else:
            rd['sex'][i] = 0
    return rd


def rmrubbish( trainData ):
	print( 'rmrubbish():\n\tInput Length:', len( trainData ), end=', ' )
	rd = trainData
	rm = []

	for k in rd.keys():
		for i in range( len( rd[k] ) ):
			if rd[k][i] == 0 or not rd[k][i]:
				rm.append( i )
	rm = list( set( rm ) )
	d = { k: [] for k in rd.keys() }
	for k in rd.keys():
		for i in range( len( rd[k] ) ):
			if i not in rm:
				d[k].append( rd[k][i] )
	# for i in range( len( trainData[ 'sex' ] ) ) :
	#     if trainData[ 'sex' ][ i ] == 0 :
	#         for k in rd.keys():
	#             rd[k].pop( i )
	#         y.pop( i )
	# for i in range( len( trainData[ 'anatom_site_general_challenge' ] ) ) :
	#     if trainData[ 'anatom_site_general_challenge' ][ i ] == 0 :
	#         rd.pop( i )
	#         y.pop( i )
	print( '\tOutput:', len( rd ) )
	return rd


def histeq( im, nbr_bins=256 ):
	""" Histogram equalization of a grayscale image. """  # get image histogram
	imhist, bins = histogram( im, nbr_bins )
	cdf = imhist.cumsum()  # cumulative distribution function
	cdf = 255*cdf/cdf[-1]  # normalize
	# use linear interpolation of cdf to find new pixel values
	return interp( im, bins[:-1], cdf )


def rmhair( image ):
	# (200,133), (128, 85), (192, 128), (500, 300)
	src = resize( imread( image, 0 ), cfg['IM_SIZE'], interpolation=INTER_AREA )
	rmhim = inpaint( src, threshold( morphologyEx(
		# cvtColor( src, COLOR_BGR2GRAY ),
		src,
		MORPH_BLACKHAT,
		getStructuringElement( 1, (17, 17) ) ),
		10, 255, THRESH_BINARY )[1], 1, INPAINT_TELEA )

	return rmhim


def preprocess( ind, fim, r=True ):
	print( 'Preprocessing' )

	D = encode( ind )

	if r:
		D = rmrubbish( D )

	images = []
	for image in D['image_name']:
		im = list(
			np.array( rmhair( fim+image+'.jpg' ) ).flatten().astype( np.float32 ) )
		# for i in range( len( im ) ):
		# 	im[i] = int( im[i] )
		images.append( im )
	d = []

	for i in range( len( D['image_name'] ) ):
		d.append( np.array(images[i]+[int(D['sex'][i]), int(D['age_approx'][i]),
		                     int(D['anatom_site_general_challenge'][i])]).astype( np.uint8 ))
		images[i] = None

	if save:
		try:
			dump( d, open( 'data.pickle', 'wb' ) )
			print( 'Saved data!' )
		except:
			print( 'Couldn\'t save data' )
	return d


print( 'Defined functions successfully!\n' )

In [None]:

try:
	if old:
		traindata = load( open( 'traindata.pickle', 'rb' ) )
	else:
		raise FileNotFoundError
except:
	if old:
		try:
			# !wget https://www.kaggleusercontent.com/kf/39175984/eyJhbGciOiJkaXIiLCJlbmMiOiJBMTI4Q0JDLUhTMjU2In0..K93-kw_CIB2KZPdId_3kWw.WK2EnB2_ReqgtoHizmTzsDQRvWf0pjFCaD54_jwoFOjQgMWSnjIBZYu0S2u0dMtZFy6TvgTm5T3hIcpC4YoLzc9RYnKa6Hk5KwIUNGwg8FoUxXK2cTLq9N_Y0NWo4zWFXirzmDQxSqktPyWcjrbt412TEHMwIWQ7iig4asW7gr7sMBpg_OHAQHGosfiGNcwGlopXk1qvN-g_64cDZqLqWiw27ryy6-ewCH0VUhiA97ZLF_eCoFf7aJFyo9D1KutZqRMgY3mijKp3hWsdYGJrLPgoI-QVwpvUyHwv6IAOcybahMNEi6jegTlzXX45Maoo5bBO-HdpTA315aJG3CIdKpdAYYEYrkNZs9cLjdFenARc4wGHzeoj6Gac8BnvIajwpmnFaw699wywYyqGbcxhKgJoCbVm7pXnTrZVXSv8HozhW8yZ33TOm4fYQZyNzJ810pS9qObJtMt87yhSA2RI91ZjzkmTCtZ2ByFi6tYSmbPJukyWP7lgZGIKtbh4P7Jt1wvp0JXKniXF6dvlqslKEzH9DmYh8qek80LMMSuFmd_31gATTgH7su6CGJLaHio3DGIUT2C9QR9YrGGr7bmKYPL889g0aapml6cdL5g3hQZ7v_uRvbcbD-3MSVfnWYQOt4DncVILrAtuqP7ogJjHLQ.Amz1cTQyanbCelCEXFmDag/traindata.pickle
			traindata = load( open( 'traindata.pickle', 'rb' ) )
		except:
			traindata = preprocess( trainData, fimtr )
	else:
		traindata = preprocess( trainData, fimtr )

print( 'Defined traindata sucessfully!\n' )

In [None]:
l = len( y )
s = int( (cfg['TEST_DATA_PERCENTAGE']/100)*l )+1

x_train = []
for i in range( len( traindata )-s ):
	x_train.append( traindata[i] )
	traindata[i] = None
y_train = []
for i in range( len( y )-s ):
	y_train.append( y[i] )
	y[i] = None
print( 'Defined x_train and y_train successfully!\n' )

In [None]:

x_test = []
for i in range( s ):
	x_test.append( traindata[-i-1] )
	traindata[-i-1] = None
y_test = []
for i in range( s ):
	y_test.append( y[-i-1] )
	y[-i-1] = None

assert len( y_train )+len( y_test ) == l
del l
print( 'Defined x_test and y_test successfully!\n' )

In [None]:

x_train = np.array( x_train ).astype( np.float32 )
y_train = np.array( y_train )
x_test = np.array( x_test ).astype( np.float32 )
y_test = np.array( y_test )

try:
	main_model = load_model( 'melanoma.h5' )
	acc = main_model.evaluate( x_test, y_test, verbose=0 )[1]
	print( 'Loaded accuracy:', acc )
	history = None
	epochs = None
	neurons = None
except:
	main_model = None
	acc = 0
	history = None
	epochs = None
	neurons = None
neuronsL, layersL, accL = ([], [], [])

#
# import pylab as plb
#
# plb.imshow( x_train[2][:-3].reshape( (133, 200) ) )
# plb.show()

# if cfg['useG/T-PU']:
# Detect hardware, return appropriate distribution strategy
import tensorflow as tf

try:
	# TPU detection. No parameters necessary if TPU_NAME environment
	# variable is set. On Kaggle this is always the case.
	tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
	print( 'Running on TPU ', tpu.master() )
except ValueError:
	tpu = None

if tpu:
	tf.config.experimental_connect_to_cluster( tpu )
	tf.tpu.experimental.initialize_tpu_system( tpu )
	strategy = tf.distribute.experimental.TPUStrategy( tpu )
else:
	# default distribution strategy in Tensorflow. Works on CPU and single GPU.
	strategy = tf.distribute.get_strategy()

print( "REPLICAS: ", strategy.num_replicas_in_sync )

In [None]:

strat = True
for i in range( cfg['ITERATIONS'] ):
    # if cfg['useG/T-PU']:
    print( 'Model '+str( i )+' :' )
    mneurons = random.randint( cfg['NEURON_MAIN']-cfg['NEURON_RANGE'], cfg['NEURON_MAIN']+cfg['NEURON_RANGE'] )
    mepochs = random.randint( cfg['EPOCH_MAIN']-cfg['EPOCH_RANGE'], cfg['EPOCH_MAIN']+cfg['EPOCH_RANGE'] )
    mlayers = random.randint( cfg['LAYER_MAIN']-cfg['LAYER_RANGE'], cfg['LAYER_MAIN']+cfg['LAYER_RANGE'] )
    print( '\tNeurons:', mneurons )
    print( '\tLayers:', mlayers )

    # if cfg['useG/T-PU']:
    if strat:
        try:
            with strategy.scope():
                model = Sequential()
                model.add( Conv2D( input_shape=(128, 128), filters=64, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( Conv2D( filters=64, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( MaxPool2D( pool_size=(2, 2), strides=(2, 2) ) )
                model.add( Conv2D( filters=128, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( Conv2D( filters=128, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( MaxPool2D( pool_size=(2, 2), strides=(2, 2) ) )
                model.add( Conv2D( filters=256, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( Conv2D( filters=256, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( Conv2D( filters=256, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( MaxPool2D( pool_size=(2, 2), strides=(2, 2) ) )
                model.add( Conv2D( filters=512, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( Conv2D( filters=512, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( Conv2D( filters=512, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( MaxPool2D( pool_size=(2, 2), strides=(2, 2) ) )
                model.add( Conv2D( filters=512, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( Conv2D( filters=512, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( Conv2D( filters=512, kernel_size=(3, 3), padding="same", activation="relu" ) )
                model.add( MaxPool2D( pool_size=(2, 2), strides=(2, 2) ) )

                model.compile( optimizer=RMSprop( lr=0.002 ), loss=cfg['LOSS'],
                               metrics=["accuracy"] )
                print( '\tStrategy:', strat )
        except:
            strat = False
    else:
        layers = [Dense( mneurons, activation="relu" )]+[Dense( mneurons, activation="relu" )]*mlayers+[
            Dense( 1, activation="sigmoid" )]
        model = Sequential( layers )
        model.compile( optimizer=RMSprop( lr=0.002 ), loss=cfg['LOSS'],
                       metrics=["accuracy"] )
        print( '\tStrategy:', strat )

    try:
        hist = model.fit( x_train, y_train, epochs=mepochs, callbacks=[cfg['CALLBACK']], verbose=0 ).history
    except KeyboardInterrupt:
        break

    macc = model.evaluate( x_test, y_test, verbose=0 )[1]
    print( '\tMacc: ', macc )
    try:
        neuronsL, layersL, accL = load( open( 'lists.pickle', 'rb' ) )
        neuronsL.append( mneurons )
        layersL.append( mlayers )
        accL.append( macc )
        dump( (neuronsL, layersL, accL), open( 'lists.pickle', 'wb' ) )
        print( '\tSaved:', neuronsL[-1], layersL[-1], accL[-1] )
        neuronsL, layersL, accL = ([], [], [])
    except:
        neuronsL.append( mneurons )
        layersL.append( mlayers )
        accL.append( macc )
        dump( (neuronsL, layersL, accL), open( 'lists.pickle', 'wb' ) )
        print( '\tFirst Saved:', neuronsL[-1], layersL[-1], accL[-1] )
        neuronsL, layersL, accL = ([], [], [])
    if macc > acc:
        try:
            model.save( "melanoma.h5" )
            main_model = model
            print( 'Saved model!' )
        except:
            print( 'Model Not saved' )
        acc = macc
        epochs = mepochs
        neurons = mneurons
        history = hist
x_train, y_train, x_test, y_test = (None, None, None, None)

In [None]:

if history:
	plot( range( len( history['accuracy'] ) ), history['accuracy'], 'r-' )
	xlabel( 'Epochs' )
	ylabel( 'Accuracy' )
	show()

try:
	neuronsL, layersL, accL = load( open( 'lists.pickle', 'rb' ) )
	for i in range( len( accL ) ):
		b = 1-accL[i]+0.3
		r = accL[i]
		plot( layersL[i], neuronsL[i], '.', color=(r, 0, b) )
		print( 'Neurons:', layersL[i], '; Epochs:', neuronsL[i], '; Acc:', accL[i] )
	xlabel( 'Layers' )
	ylabel( 'Neurons' )
	show()
except:
	print( 'Couldn\'t plot network data' )

print( 'Successfuly trained and saved model!\n' )

In [None]:
!wget 'https://www.kaggleusercontent.com/kf/39822866/eyJhbGciOiJkaXIiLCJlbmMiOiJBMTI4Q0JDLUhTMjU2In0..32KTzOldgMygZ6SMLxfTow._yP8dD8YDBYQwZQrETlQQkX9Kh7-w-Z7CM6RfK-6mbKkPIsiTcSbVVfEop7BjRwzjZSwHEOqcQ8f2Crr3EfcUF_-lsHm2hsnbdEdVNB8S_BeR6XaiXHI8NR0d2xbyCSv3iHeNJlkgpboILcWKH84J7hVeb34Cc_-RtjTI3WYS4U97YO9dugSvyX7jegXTwYzDivU5viMi4aFnYm2IlYknCHlWSmvncP0-7xoydeGdL0jVALTSGgevXIwVGfyukxvrpwLOSZmqiu8KNAB0H4IKZ_CLBIPfOV4gtiSftl8OuV6c_STOli4dKGqmSkdNsZtJJhV_IdU97TRFZhz-DVonqSnRKf92BOH073BIo6dXU-3xRsJG6x0wwvw2G2wMKYOSddG0puW3mFGZct37VAhC9PxD0iI23dg5VEwPfm-JiWcQwQDrqnaBD8CqYghVS-Fml8zmskeRUJrp2xCXAhkiTcnQWra1ymKueA-bbKu8VLVbVIGZj05PcQhvmdLHjJgiIg4Q4Soj-xpXmhn36lVDvyCcLJG6IbYMUoWlovgf4WnOUKd6vQ4BBYET4AjuRiwevuG6vYDKE6xopgGMhy4KHQrBSihZBI--5AHVQTfLp3cqKPdP7Gs5vn9QJzPCDWxyuBeqGkb3rePFCJvwMW7MP3S9X6tTU6pZ6Tmtbu63ds.BA0SNZO0Em-3KbA8x18Lnw/melanoma.h5'

In [None]:
model = load_model('melanoma.h5')
data = trainData
predictiondata = np.array( preprocess( data, fimtr, False ) ).astype( int )

predicted = list( main_model.predict( predictiondata ) )

predictions = []

for i in range( len( predicted ) ):
	predictions.append( predicted[i][0] )

df = pd.DataFrame( data={ 'image_name': data['image_name'], 'target': predictions } )
df.to_csv( 'submission.csv', sep=',', index=False )