# SIIM-ISIC Melanoma Classification

# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import pydicom as dicom
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics import mean_squared_error, r2_score

# Analysing Patient Records

In [None]:
df = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/train.csv')
df.head()

In [None]:
df = df.rename(columns = {'anatom_site_general_challenge':'site'})

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df = df.dropna(axis=0, how = 'any')

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
sns.countplot(x = 'sex', data = df, hue = 'target')

In [None]:
sns.distplot(df['age_approx'])

In [None]:
age = []
sex = []
for i in range(df.shape[0]):
    try: 
        if df['target'][i] == 1:
            age.append(df['age_approx'][i]) 
            sex.append(df['sex'][i])
    except:
        pass

In [None]:
plt.figure(figsize=(15,5))

plt.subplot(1,2,1)
sns.distplot(age)
plt.title('Distribution of age of people having malignant cancer')

plt.subplot(1,2,2)
sns.countplot(y = age)
plt.ylabel('Age')
plt.title('Count plot of age of people having malignant cancer')

In [None]:
sns.countplot(x='site', data=df, hue='target')
plt.xticks(rotation='90')

In [None]:
site = []
for i in range(df.shape[0]):
    try: 
        if df['target'][i] == 1:
            site.append(df['site'][i]) 
    except:
        pass
    

In [None]:
sns.countplot(y = site,palette="rocket")
plt.title('Graph showing count of patients having cancer and the site it is located in')
plt.ylabel('Site')

In [None]:
diagnosis = []
for i in range(df.shape[0]):
    try: 
        if df['target'][i] == 1:
            diagnosis.append(df['diagnosis'][i]) 
    except:
        pass
    

In [None]:
sns.countplot(y = diagnosis)

In [None]:
df.sex.unique()

In [None]:
df.site.unique()

In [None]:
df.diagnosis.unique()

In [None]:
df = pd.get_dummies(df, columns = ['sex'],drop_first=True)
df.head()

In [None]:
df = pd.get_dummies(df, columns = ['site'],drop_first=True)
df.head()

In [None]:
df = pd.get_dummies(df, columns = ['diagnosis'],drop_first=True)


In [None]:
df = df.drop('diagnosis_unknown', axis = 1)
df.head()

In [None]:
df = df.drop(['benign_malignant', 'patient_id'], axis = 1)
df.head()

In [None]:
plt.figure(figsize = (10,10))
sns.heatmap(df.corr()[['target']].sort_values('target').tail(16), annot = True)

In [None]:
plt.figure(figsize=(15,20))

plt.subplot(4,2,1)
sns.lineplot(df['diagnosis_melanoma'], df['target'])
plt.subplot(4,2,2)
sns.lineplot(df['age_approx'], df['target'])
plt.subplot(4,2,3)
sns.lineplot(df['site_torso'], df['target'])
plt.subplot(4,2,4)
sns.lineplot(df['diagnosis_nevus'], df['target'])
plt.subplot(4,2,5)
sns.lineplot(df['sex_male'], df['target'])
plt.subplot(4,2,6)
sns.lineplot(df['site_upper extremity'], df['target'])
plt.subplot(4,2,7)
sns.lineplot(df['site_lower extremity'], df['target'])

> # Training and Testing the Model

In [None]:
X = df[['diagnosis_melanoma','site_torso','diagnosis_nevus','site_lower extremity','site_upper extremity', 'sex_male', 'age_approx']]
y = df['target']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
classifier_lr = LogisticRegression()
classifier_lr.fit(X_train,y_train)

y_pred_lr = classifier_lr.predict(X_test)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_lr))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_lr))

## Support Vector Machine

In [None]:
from sklearn import svm

classifier_svm = svm.SVC()
classifier_svm.fit(X_train,y_train)

y_pred_svm = classifier_svm.predict(X_test)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_svm))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_svm))

## Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

classifier_dt = DecisionTreeClassifier()
classifier_dt.fit(X_train,y_train)

y_pred_dt = classifier_dt.predict(X_test)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_dt))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_dt))

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

classifier_rf = RandomForestClassifier()
classifier_rf.fit(X_train,y_train)

y_pred_rf = classifier_rf.predict(X_test)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_rf))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_rf))

## Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

classifier_gb = GradientBoostingClassifier()
classifier_gb.fit(X_train,y_train)

y_pred_gb = classifier_gb.predict(X_test)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_gb))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_gb))

## Adaptive Boosting

In [None]:
from sklearn.ensemble import AdaBoostClassifier

classifier_ab = AdaBoostClassifier()
classifier_ab.fit(X_train,y_train)

y_pred_ab = classifier_ab.predict(X_test)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_ab))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_ab))

In [None]:
x_l = ['Logistic Regression', 'Support Vector Machine', 'Decison Tree', 'Random Forest', 'Adaptive Boosting', 'Gradient Boosting']
y_l = [1.0, 0.98, 1.0, 1.0, 1.0, 1.0]

sns.barplot(y_l, x_l,palette="Reds")
plt.xlim([0.9, 1.003])

In [None]:
train1 = svm.SVC()
train1.fit(X_train,y_train)

# Analysing Images

In [None]:
image = '/kaggle/input/siim-isic-melanoma-classification/train/' + df['image_name'][91] +'.dcm'
ds = dicom.dcmread(image)

plt.imshow(ds.pixel_array)

In [None]:
s0 = df.target[df.target.eq(0)].sample(50, random_state=1).index
s1 = df.target[df.target.eq(1)].sample(60,random_state=1).index 

df = df.loc[s0.union(s1)]
df['target'].value_counts()

In [None]:
df.shape

In [None]:
images = []
for x in df['image_name']:
    image = '/kaggle/input/siim-isic-melanoma-classification/train/' + x +'.dcm'
    ds = dicom.dcmread(image)
    pixels = ds.pixel_array
    images.append(pixels.flatten())


In [None]:
import tensorflow as tf
images = tf.keras.preprocessing.sequence.pad_sequences(
  images,
  maxlen = 720,
  dtype = "int32",
  padding = "pre",
  truncating = "pre",
  value = 0
)

# Training the Model

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
X = images
y = np.array(df['target'])
classifier_lr = LogisticRegression()
classifier_lr.fit(X,y)

## Support Vector Machine

In [None]:
from sklearn import svm
X = images
y = np.array(df['target'])
classifier_svm = svm.SVC()
classifier_svm.fit(X,y)

## Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
X = images
y = np.array(df['target'])
classifier_dt = DecisionTreeClassifier()
classifier_dt.fit(X,y)

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
X = images
y = np.array(df['target'])
classifier_rf = RandomForestClassifier()
classifier_rf.fit(X,y)

## Adaptive Boosting 

In [None]:
from sklearn.ensemble import AdaBoostClassifier
X = images
y = np.array(df['target'])
classifier_ab = AdaBoostClassifier()
classifier_ab.fit(X,y)

## Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
X = images
y = np.array(df['target'])
classifier_gb = GradientBoostingClassifier()
classifier_gb.fit(X,y)

# Testing the Model

In [None]:
test = df.tail(50)
test.head()

In [None]:
test_images = []
for x in test['image_name']:
    image = '/kaggle/input/siim-isic-melanoma-classification/train/' + x +'.dcm'
    ds = dicom.dcmread(image)
    pixels = ds.pixel_array
    
    test_images.append(pixels.flatten())


In [None]:
test_images = tf.keras.preprocessing.sequence.pad_sequences(
  test_images,
  maxlen = 720,
  dtype = "int32",
  padding = "pre",
  truncating = "pre",
  value = 0
)

## Logistic Regression

In [None]:
X_test = test_images
y_test = np.array(test['target'])
y_pred_lr = classifier_lr.predict(X_test)

In [None]:
print('Accuracy Score: ',accuracy_score(y_test,y_pred_lr))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_lr))

## Support Vector Machine

In [None]:
X_test = test_images
y_test = np.array(test['target'])
y_pred_svm = classifier_svm.predict(X_test)

In [None]:
print('Accuracy Score: ',accuracy_score(y_test,y_pred_svm))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_svm))

## Decision Tree

In [None]:
X_test = test_images
y_test = np.array(test['target'])
y_pred_dt = classifier_dt.predict(X_test)

In [None]:
print('Accuracy Score: ',accuracy_score(y_test,y_pred_dt))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_dt))

## Random Forest

In [None]:
X_test = test_images
y_test = np.array(test['target'])
y_pred_rf = classifier_rf.predict(X_test)

In [None]:
print('Accuracy Score: ',accuracy_score(y_test,y_pred_rf))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_rf))

## Adaptive Boosting

In [None]:
X_test = test_images
y_test = np.array(test['target'])
y_pred_ab = classifier_ab.predict(X_test)

In [None]:
print('Accuracy Score: ',accuracy_score(y_test,y_pred_ab))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_ab))

## Gradient Boosting

In [None]:
X_test = test_images
y_test = np.array(test['target'])
y_pred_gb = classifier_gb.predict(X_test)

In [None]:
print('Accuracy Score: ',accuracy_score(y_test,y_pred_gb))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_gb))

In [None]:
x_l = ['Logistic Regression', 'Support Vector Machine', 'Decison Tree', 'Random Forest', 'Adaptive Boosting', 'Gradient Boosting']
y_l = [0.96, 0.68, 1.0, 1.0, 1.0, 1.0]

sns.barplot(y_l, x_l,palette="mako")

In [None]:
train2 = LogisticRegression()
train2.fit(X,y)

In [None]:
df.head()

# Final Testing

In [None]:
image_path = '/kaggle/input/siim-isic-melanoma-classification/train/ISIC_0149568.dcm'
details = [[55,0,0,0,1,1,0]]
image_to_test = []
ds = dicom.dcmread(image_path)
pixels = ds.pixel_array
plt.imshow(pixels)
image_to_test.append(pixels.flatten())

image_to_test = tf.keras.preprocessing.sequence.pad_sequences(
  image_to_test,
  maxlen = 720,
  dtype = "int32",
  padding = "pre",
  truncating = "pre",
  value = 0
)

if train1.predict(details) == [1]:
    
    result1 = 'Malignant'
else:
    result1 = 'Benign'

if train2.predict(image_to_test) == [1]:
    result2 = 'Malignant'
else:
    result2 = 'Benign'


print('Result from patient details: ', result1)
print('Result from patient image: ', result2)

## END