<a href="https://www.kaggle.com/code/nurmelike/ysa-ile-hastal-k-belirleme?scriptVersionId=94932522" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import random
import cv2

# verileri düzenleyebilmek değiştirebilmek için
import numpy as np
import pandas as pd

# Derin öğrenme modelleri oluşturmak yönetmek için
from keras import backend as K
from keras.preprocessing import image
from sklearn.metrics import roc_auc_score, roc_curve


from keras.preprocessing.image import ImageDataGenerator
from keras.applications.densenet import DenseNet121
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model

from keras.models import load_model


from tensorflow.keras.applications import DenseNet121
import tensorflow as tf
import tensorflow.keras.layers as Layers

# görselleştirme ve grafik için
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

random.seed(a=None, version=2)


# roc eğrisi elde etmek için
def get_roc_curve(labels, predicted_vals, generator, when = ''):
    auc_roc_vals = []
    for i in range(len(labels)):
        try:
            gt = generator.labels[:, i]
            pred = predicted_vals[:, i]
            auc_roc = roc_auc_score(gt, pred)
            auc_roc_vals.append(auc_roc)
            fpr_rf, tpr_rf, _ = roc_curve(gt, pred)
            plt.figure(1, figsize=(10, 10))
            plt.plot([0, 1], [0, 1], 'k--')
            plt.plot(fpr_rf, tpr_rf,
                     label=labels[i] + " (" + str(round(auc_roc, 3)) + ")")
            plt.xlabel('False positive rate')
            plt.ylabel('True positive rate')
            plt.title('ROC curve ' + when)
            plt.legend(loc='best')
        except:
            print(
                f"Error in generating ROC curve for {labels[i]}. "
                f"Dataset lacks enough examples."
            )
    plt.show()
    return auc_roc_vals


In [None]:
# tpu algılama kagglede
try:
 
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    #cpu ve gpu için TF dağıtım stratejisi
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
IMAGE_SIZE=[128, 128]
EPOCHS = 20
BATCH_SIZE = 64


In [None]:
train_df_main = pd.read_csv('../input/databinli/databinli.csv')


labels = train_df_main.columns[0:-1]
labels

In [None]:
# veri kümesini bölme işlemi
from sklearn.model_selection import train_test_split
train_df, discard = train_test_split(train_df_main, test_size = 0.3, random_state = 1993)
print(train_df)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print(discard)

train_and_valid_set, test_set = train_test_split(train_df, test_size = 0.2, random_state = 1993)
train_set, valid_set = train_test_split(train_and_valid_set, test_size = 0.2, random_state = 1993)

In [None]:

def get_train_generator(df, image_dir, x_col, y_cols, shuffle=True, batch_size=8, seed=1, target_w = 256, target_h = 256):
    # 
    print("getting train generator...")
    # normalize images
    image_generator = ImageDataGenerator(
        samplewise_center=True,
        samplewise_std_normalization= True, 
        shear_range=0.1,
        zoom_range=0.15,
        rotation_range=5,
        width_shift_range=0.1,
        height_shift_range=0.05,
        horizontal_flip=True, 
        vertical_flip = False, 
        fill_mode = 'reflect')
    
    
    # flow from directory with specified batch size
    # and target image size
    generator = image_generator.flow_from_dataframe(
            dataframe=df,
            directory=None,
            x_col=x_col,
            y_col=y_cols,
            class_mode="raw",
            batch_size=batch_size,
            shuffle=shuffle,
            seed=seed,
            target_size=(target_w,target_h))
    
    return generator



In [None]:
def get_test_and_valid_generator(valid_df, test_df, train_df, image_dir, x_col, y_cols, sample_size=100, batch_size=8, seed=1, target_w = 256, target_h = 256):

    print("getting train and valid generators...")
    # get generator to sample dataset
    raw_train_generator = ImageDataGenerator().flow_from_dataframe(
        dataframe=train_df, 
        directory=image_dir, 
        x_col="img_ind", 
        y_col=labels, 
        class_mode="raw", 
        batch_size=sample_size, 
        shuffle=True, 
        target_size=(target_w, target_h))
    
    # get data sample
    batch = raw_train_generator.next()
    data_sample = batch[0]

    # use sample to fit mean and std for test set generator
    image_generator = ImageDataGenerator(
        featurewise_center=True,
        featurewise_std_normalization= True)
    
    # fit generator to sample from training data
    image_generator.fit(data_sample)

    # get test generator
    valid_generator = image_generator.flow_from_dataframe(
            dataframe=valid_df,
            directory=image_dir,
            x_col=x_col,
            y_col=y_cols,
            class_mode="raw",
            batch_size=batch_size,
            shuffle=False,
            seed=seed,
            target_size=(target_w,target_h))

    test_generator = image_generator.flow_from_dataframe(
            dataframe=test_df,
            directory=image_dir,
            x_col=x_col,
            y_col=y_cols,
            class_mode="raw",
            batch_size=batch_size,
            shuffle=False,
            seed=seed,
            target_size=(target_w,target_h))
    return valid_generator, test_generator

In [None]:
train_generator = get_train_generator(df = train_set,
                                      image_dir = None, 
                                      x_col = "img_ind",
                                      y_cols = labels, 
                                      batch_size=BATCH_SIZE,
                                      target_w = IMAGE_SIZE[0], 
                                      target_h = IMAGE_SIZE[1] 
                                      )

valid_generator, test_generator= get_test_and_valid_generator(valid_df = valid_set, 
                                                              test_df = test_set, 
                                                              train_df = train_set,
                                                              
                                                              image_dir = None, 
                                                              x_col = "img_ind", 
                                                              y_cols = labels,
                                                              batch_size = BATCH_SIZE,
                                                              target_w = IMAGE_SIZE[0], 
                                                              target_h = IMAGE_SIZE[1])

In [None]:

def get_label(y):

    ret_labels = []
    i = 0
    for idx in y:
        if idx:
            ret_labels.append(labels[i])
        i += 1
    if not ret_labels:
        return 'No Label'
    else:
        return '|'.join(ret_labels)

#get one batch of images from the imageset    
x, y = train_generator.__getitem__(0)


# birkaç resmin sınıflarını başlık olarak üstüne yazdırma
fig=plt.figure(figsize=(20, 10))
columns = 4; rows =2 
for i in tqdm(range(1, columns*rows +1)):
    fig.add_subplot(rows, columns, i)
    plt.imshow(x[i-1], cmap = 'gray')
    plt.title(get_label(y[i-1]))
    plt.axis(False)
    fig.add_subplot

In [None]:
# sınıf dengesizliğini gösterdik. Her bir sınıfın yüzdelik frekansı
# unbalanced yani dengesiz veri
plt.figure(figsize=(8,4))
plt.xticks(rotation = 90)
plt.bar(labels, train_generator.labels.sum(axis = 0)/train_generator.n * 100)
plt.title('Percentage ofdifferent conditions in train dataset')
plt.xlabel('Conditions')
plt.ylabel('Percentage')
plt.show()

In [None]:
def compute_class_freqs(labels):
    """
    Compute positive and negative frequences for each class.

    Args:
        labels (np.array): matrix of labels, size (num_examples, num_classes)
    Returns:
        positive_frequencies (np.array): array of positive frequences for each
                                         class, size (num_classes)
        negative_frequencies (np.array): array of negative frequences for each
                                         class, size (num_classes)
    """    
    # total number of patients (rows)
    N = labels.shape[0]
    positive_frequencies = (labels.sum(axis = 0))/N
    negative_frequencies = 1.0 - positive_frequencies
    
    return positive_frequencies, negative_frequencies


# her satırda pozitif hastalık 1 olarak belirtiliyor ancak,
# diğer 15 hastalık 0 olarak algılanınca modelde loss değeri
# negatif sınıf tarafından ayarlanacak. 
freq_pos, freq_neg = compute_class_freqs(train_generator.labels)
data = pd.DataFrame({"Class": labels, "Label": "Positive", "Value": freq_pos})
data = data.append([{"Class": labels[l], "Label": "Negative", "Value": v} for l,v in enumerate(freq_neg)], ignore_index=True)
plt.xticks(rotation=90)
f = sns.barplot(x="Class", y="Value", hue="Label" ,data=data)

In [None]:
# bu durumun önüne geçip pozitif vakalar ile negatif olanların katkısını eşitlemek için
# her sınıftan her örneği sınıfa özgü bir ağırlık faktörü ile çarparak bu durum elde edilir.
pos_weights = freq_neg
neg_weights = freq_pos
pos_contribution = freq_pos * pos_weights 
neg_contribution = freq_neg * neg_weights
pos_weights


data = pd.DataFrame({"Class": labels, "Label": "Positive", "Value": pos_contribution})
data = data.append([{"Class": labels[l], "Label": "Negative", "Value": v} 
                        for l,v in enumerate(neg_contribution)], ignore_index=True)
plt.xticks(rotation=90)
sns.barplot(x="Class", y="Value", hue="Label" ,data=data);

In [None]:
# her eğitim durumunda ağırlıklar hesaplandıktan sonra weighted loss değeri 
# aşağıdaki fonksiyonla bulunur.
def get_weighted_loss(pos_weights, neg_weights, epsilon=1e-7):
    """
    Verilen negataif ve pozifif ağırlıklara göre ağırlıklı kayıp fonksiyonu döndürür.
    Return weighted loss function given negative weights and positive weights.

    parametreler:
      pos_weights (np.array): Her sınıf için pozitif ağırlıklar dizisi
      neg_weights (np.array): Her sınıf için negatif ağırlıklar dizisi
    
    dönüş:
      weighted_loss : ağırlıklı loss değerini döndürür
    """
    def weighted_loss(y_true, y_pred):
        """
        ağırlıklı loss değerini döndürür

        Args:
            y_true (Tensor): verilen doğru etiketler,
            y_pred (Tensor): öngörülen etiketler
        Returns:
            loss (Float): tüm sınıflardan toplanan kayıp değeri
        """
        # loss değerini 0 ile başlattık
        loss = 0.0
        
        for i in range(len(pos_weights)):
            # her sınıf için, ortalama kayıp ağırlıklarını ekliyoruz
 
            loss_pos = -1 * K.mean(pos_weights[i] * y_true[:, i] * K.log(y_pred[:, i] + epsilon))
            loss_neg = -1 * K.mean(neg_weights[i] * (1 - y_true[:, i]) * K.log(1 - y_pred[:, i] + epsilon))
            loss += loss_pos + loss_neg
        return loss

    return weighted_loss

In [None]:
# Modeli oluşturuyoruz
with strategy.scope():
    dnet121 = DenseNet121(input_shape=(*IMAGE_SIZE, 3),
                          weights='imagenet',
                          include_top=False )
    dnet121.trainable = True

    model_dnet121 = tf.keras.Sequential([ dnet121, 
                                         Layers.GlobalAveragePooling2D(), 
                                         Layers.Dense(len(labels), activation ='sigmoid') ])
    model_dnet121.compile(optimizer='adam',
                           loss = get_weighted_loss(pos_weights, neg_weights), 
                           metrics = ['accuracy'] )
    model_dnet121.summary()


In [None]:
def build_lrfn(lr_start=0.002, lr_max=0.1, 
               lr_min=0, lr_rampup_epochs=8, 
               lr_sustain_epochs=0, lr_exp_decay=.8):

    def lrfn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) *\
                 lr_exp_decay**(epoch - lr_rampup_epochs\
                                - lr_sustain_epochs) + lr_min
        return lr
    return lrfn

lrfn = build_lrfn()
lr_schedule = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=True)

In [None]:
predicted_vals_before = model_dnet121.predict_generator(test_generator, steps = len(test_generator))

In [None]:
len(test_generator)

In [None]:
#  Modelin oluşturulması
base_model = DenseNet121(weights='imagenet', include_top=False)
# base_model = DenseNet121(weights='../input/pretrained-model/pretrained_model.h5', include_top=False)

x = base_model.output
 # add a global spatial average pooling layer
x = GlobalAveragePooling2D()(x)

 # and a logistic layer
predictions = Dense(len(labels), activation="sigmoid")(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss=get_weighted_loss(pos_weights, neg_weights), metrics = ['accuracy'])
## model.load_weights('../input/pretrained-model/pretrained_model.h5') You are trying to load a weight file containing 242 layers into a model with 241 layers.
predicted_vals_before = model.predict_generator(test_generator, steps = len(test_generator))



history = model.fit_generator(train_generator, 
                               validation_data=valid_generator,
                               steps_per_epoch=len(train_generator), 
                               validation_steps=len(valid_generator), 
                               epochs = 20)

In [None]:
def visualize_training(history, lw = 3):
    plt.figure(figsize=(10,6))
    plt.plot(history.history['accuracy'], label = 'training', marker = '*', linewidth = lw)
    plt.plot(history.history['val_accuracy'], label = 'validation', marker = 'o', linewidth = lw)
    plt.title('Training Accuracy vs Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend(fontsize = 'x-large')
    plt.show()

    plt.figure(figsize=(10,6))
    plt.plot(history.history['loss'], label = 'training', marker = '*', linewidth = lw)
    plt.plot(history.history['val_loss'], label = 'validation', marker = 'o', linewidth = lw)
    plt.title('Training Loss vs Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend(fontsize = 'x-large')
    plt.show()

In [None]:
visualize_training(history)

In [None]:
predicted_vals_after = model.predict_generator(test_generator, steps = len(test_generator))
auc_rocs_before =get_roc_curve(labels, predicted_vals_before, test_generator, when = 'before training')
auc_rocs_after = get_roc_curve(labels, predicted_vals_after, test_generator, when = 'after training')

In [None]:
ind = np.arange(len(labels))
plt.figure(figsize=(15,7))
width = 0.2       
plt.bar(ind, auc_rocs_before , width, label='Before')
plt.bar(ind + width, auc_rocs_after, width, label='After')
plt.ylabel('AUROC value', fontsize = 16)
plt.title('AUROC of each diagnosis before and after training', fontsize = 18)
plt.xticks(ind + width / 2, labels, rotation = 90, fontsize = 14)
plt.yticks(fontsize = 14)
plt.legend(loc='best')
plt.grid(True)
plt.show()

In [None]:
model.save('denseNet_trained_weights.h5')