In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import tensorflow as tf
tf.keras.backend.set_learning_phase(1)
from PIL import Image
from keras import backend as K 
from keras.preprocessing.image import ImageDataGenerator 
import matplotlib.pyplot as plt
import os, sys, random, gc

from src import exploration
from src import pre_process
from src import denseNet
from src import efficientNet
from sklearn.model_selection import train_test_split




All .tiff images are stained with `Martius Scarlet Blue (MSB) stain`. The two major acute ischemic stroke (AIS) etiology subtypes and their main cellular differences are:

1. **Cardio Embolism (CE):** RBCs=47.67%, WBCs=4.22%, F= 29.19%, P=18.21%

2. **Artery Atherosclerosis (AA):** RBCs=42.58%, WBCs=3.12%, F=31.31%, P=20.81%


<font size="1,5"> [Abbasi M, Fitzgerald S, Ayers-Ringler J, Espina V, Mueller C, Rucker S, Kadirvel R, Kallmes D, Brinjikji W. Proteomic Analysis of Cardioembolic and Large Artery Atherosclerotic Clots Using Reverse Phase Protein Array Technology Reveals Key Cellular Interactions Within Clot Microenvironments. Cureus. 2021 Feb 22;13(2):e13499. doi: 10.7759/cureus.13499. PMID: 33777584; PMCID: PMC7990677.](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7990677/)</font>

Given this insight, we can maintain the distinct colors in the images since each cellular subtype is stained differently. Rather than preserving every pixel, we can reduce their size to enhance the efficiency of the neural network (NN) processing.

![CE](./images/CEexample1.png)

`Split dataset into train and valid:`

In [2]:
train1 = pd.read_csv("../Final-IRONHACK-Project/data/train1.csv")
train_x, valid_x, train_y, valid_y = pre_process.train_valid_split(train1)

In [3]:
train_x['label'].value_counts() / len(train_x.label)

label
CE     0.72471
LAA    0.27529
Name: count, dtype: float64

In [4]:
train2 = exploration.Labeling_images(train_x)
train3 = train2.class_again_train()
train3.sample(1)

Unnamed: 0,image_id,center_id,patient_id,image_num,label,new_file_path
270,56d177_2,7,56d177,2,CE,D:/bootcamp/original/train_folder/56d177_2.tif


In [5]:
valid_x['label'].value_counts() / len(valid_x.label)

label
CE     0.728477
LAA    0.271523
Name: count, dtype: float64

In [6]:
val = exploration.Labeling_images(valid_x)
val2 = val.class_again_val()
val2.sample(1)

Unnamed: 0,image_id,center_id,patient_id,image_num,label,new_file_path
297,5f520a_0,4,5f520a,0,CE,D:/bootcamp/original/val_folder1/5f520a_0.tif


In [7]:
train3.to_csv("../Final-IRONHACK-Project/data/train_xdef.csv", index = False)

In [8]:
val2.to_csv("../Final-IRONHACK-Project/data/valid_xdef.csv", index = False)

In [9]:
# data_path = "D:/bootcamp/original/"
# folder_names= ["train_folder", "val_folder"]
# pre_process.createfolders(data_path,folder_names)

In [10]:
# Just run it once because it moves permanently
    # train_x move img to train_folder
# train_path = "D:/bootcamp/original/train_folder/"
# valid_path = "D:/bootcamp/original/val_folder1/"
# pre_process.move_images(train_x, train_path)
# pre_process.move_images(valid_x, valid_path)

In [11]:
# # For training folder
# data_path = "D:/bootcamp/original/try_train/"
# folder_names= ["CE", "LAA"]
# pre_process.createfolders(data_path,folder_names)

In [12]:
# # For validation folder
# data_path = "D:/bootcamp/original/try_val/"
# folder_names= ["CE", "LAA"]
# pre_process.createfolders(data_path,folder_names)

In [13]:
# Just run it once because it moves permanently
    #  For the train folder
# folder_path_CE = "D:/bootcamp/original/train_folder/CE/"
# folder_path_LAA = "D:/bootcamp/original/train_folder/LAA/"
# pre_process.images_class(train3, folder_path_CE, folder_path_LAA)

In [14]:
# Just run it once because it moves permanently
    # For the validation folder
# folder_path_CE = "D:/bootcamp/original/val_folder1/CE/"
# folder_path_LAA = "D:/bootcamp/original/val_folder1/LAA/"
# pre_process.images_class(val2, folder_path_CE, folder_path_LAA)

In [15]:
# So I can plot all images regardless of the amount of pixels:
Image.MAX_IMAGE_PIXELS = 3000000000 
K.set_learning_phase(1)

# nb_train_samples = 2  
# nb_validation_samples = 4  

n_classes = 2 

In [16]:
train_generator, test_generator = pre_process.image_generator()


Found 14 images belonging to 2 classes.
Found 8 images belonging to 2 classes.


`DenseNet model:`

In [17]:
# n_classes = 2
# dense_model = denseNet.DenseNet_model()
# epochs = 1
# batch_size = 5
# architecture = dense_model.densenet_model(train_generator, test_generator, 150, 150, epochs, batch_size)
# architecture.summary()  

In [18]:
# comp = dense_model.compile_model()

In [19]:
dense_net_model = denseNet.DenseNet_model()

# Set generators
dense_net_model.set_generators(train_generator, test_generator)

# Train the model
history = dense_net_model.train(train_x, epochs=1, batch_size=2)

# Get and save the weights
dense_net_model.get_weights()








  1/302 [..............................] - ETA: 4:21:26 - loss: 0.7006 - accuracy: 0.4000

KeyboardInterrupt: 

In [None]:
# hist = dense_model.train(train_generator, test_generator, epochs = 1, batch_size =5)

In [None]:
# weigths = dense_model.get_weights()

In [None]:
# class_names = ["CE", "LAA"]
# batch_size = 10
# verbose = 1
# eval = dense_model.evaluation(test_generator, class_names, batch_size)

In [None]:
# DenseNet = dense_model.predict_densenet(test_generator,class_names)

In [None]:
# def plot_hist(hist): 
#     plt.plot(hist.history["accuracy"])
#     plt.plot(hist.history["val_accuracy"])
#     plt.title("model accuracy")
#     plt.ylabel("accuracy")
#     plt.xlabel("epoch")
#     plt.legend(["train", "validation"], loc="upper left")
#     plt.show()


# plot_hist(hist)

`EfficientNet model:`

In [None]:
# efficient_model = efficientNet.EfficientNet()

# architecture = efficient_model.efficient_model()
# architecture.summary()

In [None]:
# compB0 = efficient_model.compile_model()

In [None]:
# hist = efficient_model.train(train_generator, test_generator, epochs = 5)

In [None]:
# efficient_model.get_weights()

In [None]:
# class_names = ["CE", "LAA"]
# efficient_model.predict_efficientNet(test_generator,class_names)

In [None]:
# def plot_hist(hist):
#     plt.plot(hist.history["accuracy"])
#     plt.plot(hist.history["val_accuracy"])
#     plt.title("model accuracy")
#     plt.ylabel("accuracy")
#     plt.xlabel("epoch")
#     plt.legend(["train", "validation"], loc="upper left")
#     plt.show()


# plot_hist(hist)

In [None]:
#validation_metrics = efficient_model.evaluate(test_generator)

In [None]:
# efficient_model.unfreeze_model(hist)

#https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/