In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        break

In [None]:
import matplotlib.pyplot as plt
import seaborn
import cv2
from sklearn.model_selection import train_test_split
%matplotlib inline

In [None]:
import tensorflow as tf
import keras
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,BatchNormalization
from keras.models import Sequential,Model
from tqdm import tqdm

In [None]:
reshape_size =90
channel = 3
batch_size = 16

In [None]:
df = pd.read_csv('../input/siim-isic-melanoma-classification/train.csv')

In [None]:
df.info()

# Count Values 

In [None]:
df.target.value_counts()

In [None]:
df.diagnosis.value_counts()

In [None]:
df.sex.value_counts()

In [None]:
df.anatom_site_general_challenge.value_counts()

In [None]:
df0 = df[df.target==0]
df1 = df[df.target==1]

In [None]:
print(df0.shape)
print(df1.shape)

In [None]:
sdf1 = pd.concat([df0.iloc[0:2336,:],df1,df1,df1,df1])

In [None]:
sdf2 = pd.concat([df0.iloc[0:550,:],df1])

In [None]:
new_df  =sdf2.copy()

In [None]:
def labelfullpath(df,train=True):
    base_path="../input/siim-isic-melanoma-classification/jpeg"
    if(train==True):
        base_path = os.path.join(base_path,"train")
    else:
        base_path = os.path.join(base_path,"test")
    fullpath = [os.path.join(base_path,img+".jpg") for img in df.image_name]
    df['fullpath'] = fullpath
    return df

In [None]:
new_df = labelfullpath(new_df)
new_df.head()

In [None]:
new_df.shape

In [None]:
sample_img=new_df.shape[0]//2

In [None]:
# new_df['gender'] = [float(new_df['sex'].values[i]=='female') for i in range(new_df.shape[0])]
# dict_anatom = {'oral/genital':0,'palms/soles':0.20,'head/neck':0.40,'upper extremity':0.60,'lower extremity':0.80,'torso':1.0}
# new_df['anatom_site'] = [dict_anatom[new_df['anatom_site_general_challenge'].values[i]] for i in range(new_df.shape[0])]
# new_df['age'] = [(new_df['age_approx'].values[i])/100.0 for i in range(new_df.shape[0])]
# x = new_df[['gender','anatom_site','age']].values

# Plot some Images

In [None]:
new_df.fullpath.values[0]

In [None]:
new_df,new_df_test = train_test_split(new_df,test_size=0.1,random_state=0)

# Benign (0) images

In [None]:
plt.figure(figsize = (20,10))
num = 1
one = new_df.head(5)
for i in range(5):
    plt.subplot(1,5,num)
    im = plt.imread(one.fullpath.values[i])
    plt.imshow(im)
    plt.title(im.shape)
    plt.xlabel(one.sex.values[i]+" "+str(one.age_approx.values[1])+"\n"+one.anatom_site_general_challenge.values[1])
    num+=1

# malignant (1) images

In [None]:
plt.figure(figsize = (20,10))
num = 1
one = new_df.tail(5)
for i in range(5):
    plt.subplot(1,5,num)
    im = plt.imread(one.fullpath.values[i])
    plt.imshow(im)
    plt.title(im.shape)
    plt.xlabel(one.sex.values[i]+" "+str(one.age_approx.values[1])+"\n"+one.anatom_site_general_challenge.values[1])
    num+=1

In [None]:
def preprocessing_images(imglist,channel=1):
    image_arr =[] 
    for img in imglist[0]:
        if(channel==1):
            i = cv2.imread(img)
        else:
            i = cv2.imread(img)
        i = cv2.resize(i,(reshape_size,reshape_size))
        i = i/255.0
        image_arr.append(i)
    return np.array(image_arr)    

In [None]:
def batchprocessing(imglist,channel=1):
    total_len = len(imglist)
    arrlist = np.empty((batch_size,reshape_size,reshape_size,channel), dtype=float, order='C')
    for i in tqdm(range(total_len//batch_size +1)):
        start = i*batch_size
        end  = start+batch_size
        try:
            arrlist=np.append(arrlist,preprocessing_images([imglist[start:end]]),axis=0)
        except:
            pass   
    return arrlist    

In [None]:
images = batchprocessing(new_df.fullpath.values,channel=channel)

In [None]:
images.shape

In [None]:
shuffle_index = [i for i in range(0,new_df.shape[0])]
np.random.shuffle(shuffle_index)

In [None]:
shuffle_images = images[shuffle_index]
shuffle_labels = new_df.target.values[shuffle_index]

In [None]:
if(channel==1):    
    fit_images = np.expand_dims(shuffle_images,axis=3)
else:
    fit_images = shuffle_images
onehot_labels = np.array([np.eye(2)[i] for i in shuffle_labels])

In [None]:
print(fit_images.shape)
print(onehot_labels.shape)

In [None]:
plt.imshow(shuffle_images[8],cmap = 'hot')

In [None]:
def simple_model():
    keras.backend.clear_session()
    vgg = keras.applications.VGG16(input_shape=(reshape_size,reshape_size,channel),include_top=False,weights = 'imagenet')
    vgg.trainable = False
    vgg.layers[-2].trainable=True
    mainmodel = Sequential([vgg,
                            Flatten(),
                           Dense(1000,activation='relu'),
#                            Dense(1000,activation='relu'),
#                            Dense(500,activation='relu'),
                           Dense(2,activation='softmax')])
    mainmodel.compile('adam','categorical_crossentropy',metrics = ['accuracy'])
    print(mainmodel.summary())
    print("input shape ",mainmodel.input_shape)
    print("output shape ",mainmodel.output_shape)
    return mainmodel

In [None]:
model = simple_model()

In [None]:
hist = model.fit(fit_images,onehot_labels,epochs=40,batch_size=32,validation_split=0.2)

In [None]:
plt.figure(figsize=(10,7))
plt.subplot(1,2,1)
plt.plot(hist.history['accuracy'],label='accuracy')
plt.plot(hist.history['loss'],label='loss')
plt.legend()
plt.title("training set")
plt.grid()
plt.subplot(1,2,2)
plt.plot(hist.history['val_accuracy'],label='val_accuracy')
plt.plot(hist.history['val_loss'],label='val_loss')
plt.legend()
plt.title("validation set")
plt.grid()
plt.ylim((0,4))

In [None]:
test_data = pd.read_csv('../input/siim-isic-melanoma-classification/test.csv')

In [None]:
test_data.head()

In [None]:
test_data.shape

In [None]:
test_data=labelfullpath(test_data,train=False)

In [None]:
# test_data.anatom_site_general_challenge.fillna('torso',inplace=True)
# test_data['gender'] = [float(test_data['sex'].values[i]=='female') for i in range(test_data.shape[0])]
# dict_anatom = {'oral/genital':0,'palms/soles':0.20,'head/neck':0.40,'upper extremity':0.60,'lower extremity':0.80,'torso':1.0}
# test_data['anatom_site'] = [dict_anatom[test_data['anatom_site_general_challenge'].values[i]] for i in range(test_data.shape[0])]
# test_data['age'] = [(test_data['age_approx'].values[i])/100.0 for i in range(test_data.shape[0])]
# xt = test_data[['gender','anatom_site','age']].values

In [None]:
start=0
end = 100

In [None]:
test_data['target']=[0 for i in range(test_data.shape[0])]

In [None]:
imagest = batchprocessing(test_data.fullpath.values[start:end],channel=channel)
target = model.predict(imagest)

In [None]:
test_target = np.argmax(target,axis=1)

In [None]:
test_data['target'][0:5] = [0,1,1,1,1]

In [None]:
test_data['target'][0:5]

In [None]:
test_target

In [None]:
test_data['target'][start:end] = test_target
# test_data = test_data[['image_name','target']]

In [None]:
target.shape

In [None]:
test_data.to_csv('submission.csv',index=False)