In [1]:
import pandas as pd 
import numpy as np 
import tensorflow_io as tfio 
import tensorflow as tf
import seaborn as sns 
import fastai2 
import os 
import category_encoders as ce
from   sklearn.cluster import KMeans

### Exploring Given Data  

In [2]:
dirctory='/media/tkrsh/ebbc93a5-618a-433c-b301-2406c8ffeca7/siim-isic-melanoma-classification/jpeg/train/'

In [3]:
os.chdir("/media/tkrsh/ebbc93a5-618a-433c-b301-2406c8ffeca7/siim-isic-melanoma-classification")

In [4]:
df= pd.read_csv("train.csv")
test=pd.read_csv("test.csv")
sub = pd.read_csv("sample_submission.csv")

In [5]:
df.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0
1,ISIC_0015719,IP_3075186,female,45.0,upper extremity,unknown,benign,0
2,ISIC_0052212,IP_2842074,female,50.0,lower extremity,nevus,benign,0
3,ISIC_0068279,IP_6890425,female,45.0,head/neck,unknown,benign,0
4,ISIC_0074268,IP_8723313,female,55.0,upper extremity,unknown,benign,0


In [6]:
features=['sex','age_approx','anatom_site_general_challenge']

In [7]:
targets=['diagnosis','benign_malignant','target']

## Data Preproccesing  

#### 1) Filling NaN Values If Any 

In [8]:
df[features].isna().sum()

sex                               65
age_approx                        68
anatom_site_general_challenge    527
dtype: int64

In [9]:
def fill_na(df):
    df["anatom_site_general_challenge"].fillna(value="Unkown",inplace=True)
    df["age_approx"].fillna(value=int(df.age_approx.mean()),inplace=True)
    df["sex"].fillna(value="female",inplace=True)
    return df
df=fill_na(df)
test=fill_na(test)

Making sure we have filled all NaN values successfully

In [10]:
assert (df[features].isna().sum()).all()==0
assert (test[features].isna().sum()).all()==0

#### 2) Encoding Features in Train DataFrame With CatBoostEncoding 

Converting Age into 3 Different Categories 

In [11]:
Means=KMeans(n_clusters=3).fit((df["age_approx"].values).reshape(-1,1))
def categorize_age(df,Means):
    df["Age_Cat"]=(Means.predict((df["age_approx"].values).reshape(-1,1)))
    df=pd.concat([df,pd.get_dummies(df['Age_Cat'],prefix="_age")],axis=1).drop(['Age_Cat'],axis=1)
    df=df.drop("age_approx",axis=1)
    return df

In [12]:
def cat_boost_encode(features,df):
    Cat_Boost= ce.CatBoostEncoder()
    Cat_Boost.fit(df[features],df["target"])
    df= df.join(Cat_Boost.transform(df[features]).add_suffix("_enc"))
    return df

In [13]:
def one_hot_encode(df):
    df=pd.concat([df,pd.get_dummies(df['sex'])],axis=1).drop(['sex'],axis=1)
    df=pd.concat([df,pd.get_dummies(df['anatom_site_general_challenge'])],axis=1).drop(['anatom_site_general_challenge'],axis=1)
    return df

In [14]:
df.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0
1,ISIC_0015719,IP_3075186,female,45.0,upper extremity,unknown,benign,0
2,ISIC_0052212,IP_2842074,female,50.0,lower extremity,nevus,benign,0
3,ISIC_0068279,IP_6890425,female,45.0,head/neck,unknown,benign,0
4,ISIC_0074268,IP_8723313,female,55.0,upper extremity,unknown,benign,0


#### Creating Train DataFrame

In [15]:
train=categorize_age(df,Means)

In [16]:
train=one_hot_encode(train)

In [17]:
train.head()

Unnamed: 0,image_name,patient_id,diagnosis,benign_malignant,target,_age_0,_age_1,_age_2,female,male,Unkown,head/neck,lower extremity,oral/genital,palms/soles,torso,upper extremity
0,ISIC_2637011,IP_7279968,unknown,benign,0,0,0,1,0,1,0,1,0,0,0,0,0
1,ISIC_0015719,IP_3075186,unknown,benign,0,0,0,1,1,0,0,0,0,0,0,0,1
2,ISIC_0052212,IP_2842074,nevus,benign,0,0,0,1,1,0,0,0,1,0,0,0,0
3,ISIC_0068279,IP_6890425,unknown,benign,0,0,0,1,1,0,0,1,0,0,0,0,0
4,ISIC_0074268,IP_8723313,unknown,benign,0,0,0,1,1,0,0,0,0,0,0,0,1


### Encoding Features In Test DataFrame

Converting age into 3 categories 

In [18]:
test=categorize_age(test,Means)

In [19]:
test.head()

Unnamed: 0,image_name,patient_id,sex,anatom_site_general_challenge,_age_0,_age_1,_age_2
0,ISIC_0052060,IP_3579794,male,Unkown,1,0,0
1,ISIC_0052349,IP_7782715,male,lower extremity,0,1,0
2,ISIC_0058510,IP_7960270,female,torso,0,0,1
3,ISIC_0073313,IP_6375035,female,torso,0,0,1
4,ISIC_0073502,IP_0589375,female,lower extremity,0,0,1


In [20]:
test=one_hot_encode(test)

In [21]:
train.head()

Unnamed: 0,image_name,patient_id,diagnosis,benign_malignant,target,_age_0,_age_1,_age_2,female,male,Unkown,head/neck,lower extremity,oral/genital,palms/soles,torso,upper extremity
0,ISIC_2637011,IP_7279968,unknown,benign,0,0,0,1,0,1,0,1,0,0,0,0,0
1,ISIC_0015719,IP_3075186,unknown,benign,0,0,0,1,1,0,0,0,0,0,0,0,1
2,ISIC_0052212,IP_2842074,nevus,benign,0,0,0,1,1,0,0,0,1,0,0,0,0
3,ISIC_0068279,IP_6890425,unknown,benign,0,0,0,1,1,0,0,1,0,0,0,0,0
4,ISIC_0074268,IP_8723313,unknown,benign,0,0,0,1,1,0,0,0,0,0,0,0,1


In [22]:
train.head()

Unnamed: 0,image_name,patient_id,diagnosis,benign_malignant,target,_age_0,_age_1,_age_2,female,male,Unkown,head/neck,lower extremity,oral/genital,palms/soles,torso,upper extremity
0,ISIC_2637011,IP_7279968,unknown,benign,0,0,0,1,0,1,0,1,0,0,0,0,0
1,ISIC_0015719,IP_3075186,unknown,benign,0,0,0,1,1,0,0,0,0,0,0,0,1
2,ISIC_0052212,IP_2842074,nevus,benign,0,0,0,1,1,0,0,0,1,0,0,0,0
3,ISIC_0068279,IP_6890425,unknown,benign,0,0,0,1,1,0,0,1,0,0,0,0,0
4,ISIC_0074268,IP_8723313,unknown,benign,0,0,0,1,1,0,0,0,0,0,0,0,1


In [23]:
train['image_name']=train['image_name'] +'.jpg'
test['image_name']=test['image_name'] +'.jpg'

In [24]:
train.head()

Unnamed: 0,image_name,patient_id,diagnosis,benign_malignant,target,_age_0,_age_1,_age_2,female,male,Unkown,head/neck,lower extremity,oral/genital,palms/soles,torso,upper extremity
0,ISIC_2637011.jpg,IP_7279968,unknown,benign,0,0,0,1,0,1,0,1,0,0,0,0,0
1,ISIC_0015719.jpg,IP_3075186,unknown,benign,0,0,0,1,1,0,0,0,0,0,0,0,1
2,ISIC_0052212.jpg,IP_2842074,nevus,benign,0,0,0,1,1,0,0,0,1,0,0,0,0
3,ISIC_0068279.jpg,IP_6890425,unknown,benign,0,0,0,1,1,0,0,1,0,0,0,0,0
4,ISIC_0074268.jpg,IP_8723313,unknown,benign,0,0,0,1,1,0,0,0,0,0,0,0,1


In [25]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [26]:
train_image_generator=ImageDataGenerator(rescale=1./255,validation_split=0.2)

In [27]:
train_data_gen=train_image_generator.flow_from_dataframe(train,directory=dirctory,x_col='image_name',y_col='benign_malignant',class_mode='binary',target_size=(32,32),batch_size=1)

Found 33126 validated image filenames belonging to 2 classes.


In [28]:
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D


In [29]:
from tensorflow.keras import Sequential

In [30]:
33126/32

1035.1875

In [31]:
IMG_HEIGHT=32
IMG_WIDTH=32

In [32]:
model = Sequential([
    Conv2D(16, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),
    MaxPooling2D(),
    Conv2D(16, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(12, activation='relu'),
    Dense(1)
])

In [33]:
model.compile(optimizer='adam',
              loss=tf.losses.binary_crossentropy,
              metrics=['accuracy'])


In [35]:
model.predict_classes()

TypeError: img_to_array() missing 1 required positional argument: 'img'

In [34]:
history = model.fit(
    train_data_gen,steps_per_epoch=2,
    epochs=10
)

Epoch 1/10


UnknownError:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node sequential/conv2d/Conv2D (defined at <ipython-input-34-d9b49c868155>:1) ]] [Op:__inference_train_function_865]

Function call stack:
train_function
