In [35]:
import glob, os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [36]:
# load our dataset
IMAGE_SIZE = (224, 224)
INPUT_SHAPE = (224, 224, 3)

diseases_list = ['melanoma', 'nevus', 'seborrheic_keratosis']

def get_disease_idx(folder_name):
    print(folder_name)
    disease_name = os.path.basename(folder_name)
    print(disease_name)
    disease_idx = 0
    for diseases in diseases_list:
        if disease_name == diseases:
            return disease_idx
        disease_idx += 1
    return None

In [37]:
def get_image_array(path):
    #read all sub folders in all train and test path
    subfolders = [f.path for f in os.scandir(path) if f.is_dir()]  
    list_of_images = []
    list_of_lables = []
    for each_folder in subfolders:
        disease_idx = get_disease_idx(each_folder)
        if disease_idx is None:
            return False, "Failed to prepare data as disease is not found in allowed diseases list"
        for current_image in glob.glob(each_folder+'\*.jpg'): #to read every image in particular folder
            list_of_images.append(np.array(cv2.imread(current_image)))
            list_of_lables.append(np.array(disease_idx))
    return_image_array = np.array(list_of_images)
    return_lables_array = np.array(list_of_lables)
    print(return_image_array.shape)
    print(return_lables_array.shape)
    return return_image_array, return_lables_array

In [225]:
X_train, y_train = get_image_array(r'D:\Data Science\Datasets for image classification\derma_disease_dataset\dataset\train')
X_test, y_test = get_image_array(r'D:\Data Science\Datasets for image classification\derma_disease_dataset\dataset\test')

D:\Data Science\Datasets for image classification\derma_disease_dataset\dataset\train\melanoma
melanoma
D:\Data Science\Datasets for image classification\derma_disease_dataset\dataset\train\nevus
nevus
D:\Data Science\Datasets for image classification\derma_disease_dataset\dataset\train\seborrheic_keratosis
seborrheic_keratosis
(938, 224, 224, 3)
(938,)
D:\Data Science\Datasets for image classification\derma_disease_dataset\dataset\test\melanoma
melanoma
D:\Data Science\Datasets for image classification\derma_disease_dataset\dataset\test\nevus
nevus
D:\Data Science\Datasets for image classification\derma_disease_dataset\dataset\test\seborrheic_keratosis
seborrheic_keratosis
(368, 224, 224, 3)
(368,)


In [226]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2]*X_train.shape[3])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2]*X_test.shape[3])

In [227]:
X_train, y_train = shuffle(X_train, y_train, random_state=42)
X_test, y_test = shuffle(X_test, y_test, random_state=42)

In [525]:
clf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=50, min_weight_fraction_leaf=0.0,
           n_estimators=30, n_jobs=1, oob_score=False, random_state=1,
           verbose=0, warm_start=False)

In [526]:
clf.fit(X_train, y_train)





RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=1e-07,
                       min_samples_leaf=1, min_samples_split=50,
                       min_weight_fraction_leaf=0.0, n_estimators=30, n_jobs=1,
                       oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [527]:
preds = clf.predict(X_test)

In [528]:
print("Accuracy:", accuracy_score(y_test,preds))

Accuracy: 0.6603260869565217


In [388]:
clf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=20, n_jobs=1, oob_score=False, random_state=1,
           verbose=0, warm_start=False)

In [12]:
clf.fit(X_train, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=1e-07,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
                       oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [13]:
preds = clf.predict(X_test)

In [14]:
print("Accuracy:", accuracy_score(y_test,preds))

Accuracy: 0.6032608695652174


In [13]:
clf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=1,
           verbose=0, warm_start=False)

In [14]:
clf.fit(X_train, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=1e-07,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
                       oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [15]:
preds = clf.predict(X_test)

In [16]:
print("Accuracy:", accuracy_score(y_test,preds))

Accuracy: 0.6059782608695652


In [19]:
clf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=1,
           verbose=0, warm_start=False)

In [20]:
clf.fit(X_train, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=1e-07,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
                       oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [21]:
preds = clf.predict(X_test)

In [22]:
print("Accuracy:", accuracy_score(y_test,preds))

Accuracy: 0.6032608695652174


In [24]:
clf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=5, min_weight_fraction_leaf=0.0,
           n_estimators=20, n_jobs=1, oob_score=False, random_state=1,
           verbose=0, warm_start=False)

In [25]:
clf.fit(X_train, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=1e-07,
                       min_samples_leaf=1, min_samples_split=5,
                       min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
                       oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [26]:
preds = clf.predict(X_test)

In [27]:
print("Accuracy:", accuracy_score(y_test,preds))

Accuracy: 0.6277173913043478


In [29]:
clf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=3,
           min_samples_split=5, min_weight_fraction_leaf=0.0,
           n_estimators=20, n_jobs=1, oob_score=False, random_state=1,
           verbose=0, warm_start=False)

In [30]:
clf.fit(X_train, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=1e-07,
                       min_samples_leaf=3, min_samples_split=5,
                       min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
                       oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [31]:
preds = clf.predict(X_test)

In [32]:
print("Accuracy:", accuracy_score(y_test,preds))

Accuracy: 0.6141304347826086


In [121]:
clf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=15, min_weight_fraction_leaf=0.0,
           n_estimators=50, n_jobs=1, oob_score=False, random_state=1,
           verbose=0, warm_start=False)

In [122]:
clf.fit(X_train, y_train)





RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=1e-07,
                       min_samples_leaf=1, min_samples_split=15,
                       min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=1,
                       oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [123]:
preds = clf.predict(X_test)

In [124]:
print("Accuracy:", accuracy_score(y_test,preds))

Accuracy: 0.6521739130434783
