In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import matplotlib.image as im
import cv2 as cv2
import tensorflow as tf
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers,models,Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow_addons as tfa
import os

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_csv=pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/train.csv')
test_csv=pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/test.csv')
print(train_csv.head())
pd.set_option('display.max_columns',500)
pd.set_option('display.max_rows',500)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [None]:
tf.debugging.set_log_device_placement(True)

print("Num of Physical GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

gpu=tf.config.experimental.list_physical_devices('GPU')

if gpu:
 
  try:
    tf.config.experimental.set_virtual_device_configuration(
        
         gpu[0],
         [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024),
          tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)]
        
    )
    

    
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
 
 
  except RuntimeError as e:
 
    # Virtual devices must be set before GPUs have been initialized
    print(e)
    
    
print('Number of Logical GPUs Available: ', len(tf.config.experimental.list_logical_devices('GPU')))

print(gpu)
print(logical_gpus)

In [None]:
strategy=tf.distribute.experimental.MultiWorkerMirroredStrategy()
print(strategy)

In [None]:
print(train_csv.head())
print(train_csv.columns)
print(train_csv.count())
print(train_csv.shape)


In [None]:
train_csv['target'].value_counts()
plt.hist(train_csv['target'])
plt.show()

In [None]:
print(train_csv.isna().any())
print('\n')
print('Null values in Sex Column : ' +str(train_csv['sex'].isna().sum()))
print('Null values in Age Column : ' +str(train_csv['age_approx'].isna().sum()))
print('Null values in Anatom Column : '+str(train_csv['anatom_site_general_challenge'].isna().sum()))

In [None]:
train_csv['age_approx'].describe()
plt.hist(train_csv['age_approx'])
plt.show()

In [None]:
train_csv['age_approx']=train_csv['age_approx'].fillna(value=np.mean(train_csv['age_approx']))
train_csv['age_approx'].isna().any()

In [None]:
print(train_csv.groupby('sex')['anatom_site_general_challenge'].value_counts())
train_csv['anatom_site_general_challenge']=train_csv['anatom_site_general_challenge'].fillna(value='torso')
print('\n')
print(train_csv['anatom_site_general_challenge'].value_counts())

In [None]:
print(train_csv['sex'].value_counts())
fun=lambda x:1 if (x=='male') else 0
train_csv['sex']=train_csv['sex'].apply(fun)

In [None]:
train_sex_anatom=train_csv.groupby('sex')['anatom_site_general_challenge'].value_counts()
print(train_sex_anatom)

In [None]:
train_csv.groupby('sex')['anatom_site_general_challenge'].value_counts().plot(kind='bar')
plt.show()

In [None]:
train_sex_anatom=train_csv.groupby('sex')['benign_malignant'].value_counts()
print(train_sex_anatom)

In [None]:
train_grp=train_csv.groupby('benign_malignant')['diagnosis'].value_counts()
train_grp

In [None]:
img_width=224
img_height=224
channels=3
train_jpg_dir='/kaggle/input/siim-isic-melanoma-classification/jpeg/train/'
test_jpg_dir='/kaggle/input/siim-isic-melanoma-classification/jpeg/test/'

In [None]:
train_benign=train_csv[train_csv['target']==0]
train_malig=train_csv[train_csv['target']==1]
print(train_benign.shape)
print(train_malig.shape)
print(train_benign.tail())

In [None]:
train_benign_batch=10
var='train_benign_'
train_data=[[] for i in range(train_benign_batch+1)]
train_labels=[[] for i in range(train_benign_batch+1)]
source=[]

benign_start=0
size=train_benign.shape[0]//train_benign_batch
benign_stop=benign_start+size

malig_start=0
malig_stop=train_malig.shape[0]



for i in range(0,len(train_data)):

    for j in range(benign_start,benign_stop):
        
        if j<train_benign.shape[0]:
            train_data[i].append(train_jpg_dir + train_benign['image_name'].iloc[j]+'.jpg')
            train_labels[i].append(train_benign['target'].iloc[j])
        
        else:
            break
       
    for k in range(malig_start,malig_stop+1):
        
        if k<train_malig.shape[0]:
            train_data[i].append(train_jpg_dir+train_malig['image_name'].iloc[k]+'.jpg')
            train_labels[i].append(train_malig['target'].iloc[k])
        else:
            break
     
    benign_start=benign_stop
    benign_stop=benign_stop+size

    

for l in range(0,len(train_data)):
        name=var+str(l)
        name=pd.DataFrame(train_data[l])
        name.columns=['images']
        name['target']=train_labels[l]
        source.append(name)
        print(source[l]['target'].value_counts())

In [None]:
test_data=[]
test_labels=[]

for i in range(test_csv.shape[0]):
        test_data.append(test_jpg_dir+test_csv['image_name'].iloc[i]+'.jpg')
      

df_test=pd.DataFrame(test_data)
df_test.columns=['images']
df_test['images']=test_data
df_test.head()
    

In [None]:
train_gen=[]
val_gen=[]

for i in range(len(source)):
    
    train_aug_name='train_aug_'+str(i)
    val_aug_name='valid_aug_'+str(i)
    train_generator_name='train_generator_'+str(i)
    val_generator_name='val_generator_'+str(i)
    
    X_train,X_val,y_train,y_val=train_test_split(source[i]['images'],source[i]['target'],test_size=0.2,random_state=20,stratify=source[i]['target'])

    train=pd.DataFrame(X_train)
    train.columns=['images']
    train['target']=y_train

    validation=pd.DataFrame(X_val)
    validation.columns=['images']
    validation['target']=y_val
    
   
    train_aug_name = ImageDataGenerator(
        rescale=1./255,
        brightness_range=[0.2,0.5],
        shear_range=0.4,
        rotation_range=30,
        horizontal_flip=True,
        width_shift_range=2
)

    val_aug_name = ImageDataGenerator(
        rescale=1./255,
)


    train_generator_name = train_aug_name.flow_from_dataframe(
        train,
        x_col='images',
        y_col='target',
        target_size=(img_width, img_height),
        batch_size=32,
        shuffle=True,
        class_mode='raw',
        
)
        
 
    val_generator_name = val_aug_name.flow_from_dataframe(
        validation,
        x_col='images',
        y_col='target',
        target_size=(img_width,img_height),
        batch_size=16,
        shuffle=True,
        class_mode='raw'
     
)
    train_gen.append(train_generator_name)
    val_gen.append(val_generator_name)



In [None]:
epochs=3
num_dataset=1
saved_model_path='/kaggle/working/'

for i in range(num_dataset):
        
        print('Starting with Dataset : '+str(i))
    
        with strategy.scope():
    
            base_model=tf.keras.applications.InceptionV3(include_top=False,weights='imagenet',classes=2,input_shape=(img_width,img_height,channels))
            base_model.trainable=False
        
            model_name='model_'+str(i)
        
            model=tf.keras.Sequential([
                base_model,
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(1,activation='sigmoid')
                ])
    
            model.compile(tf.keras.optimizers.Adam(learning_rate=0.0001),loss='binary_crossentropy',metrics='AUC')

    
            model.fit_generator(
                train_gen[i],
                validation_data=val_gen[i],
                epochs=epochs
             
                )
        
            print('Saving '+model_name)
            model.save(saved_model_path+model_name+'.h5')

      



In [None]:
submission=pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/sample_submission.csv')
target=[]


for path in tqdm(df_test['images']):
        img=cv2.imread(str(path))
        img = cv2.resize(img, (224,224))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32)/255.
        img=np.reshape(img,(1,224,224,3))
        prediction=model.predict(img)
        f=prediction.flatten()
        target.append(f[0])
        

submission['target']=target
print(submission.head())

In [None]:
submission.to_csv('/kaggle/working/incept_try_submission.csv', index=False)
submission.shape
submission.head()
