In [1]:
# Import dependencies
from tensorflow.keras.preprocessing import image_dataset_from_directory
from alzheimers_final_project.params import BATCH_SIZE, IMAGE_SIZE
import tensorflow as tf

#data.py
# Insert your local path to train data and test data here:
path_to_train_data = '/raw_data/ALzheimersDataset/train'
path_to_test_data = '/raw_data/ALzheimersDataset/test'

def train_data_loading(path_to_train_data):

    # Train Generator
    train_generator = image_dataset_from_directory(
    path_to_train_data,
    image_size = IMAGE_SIZE,
    batch_size = BATCH_SIZE,
    labels = 'inferred',
    label_mode = 'categorical',
    shuffle = True,
    validation_split = 0.2,
    subset = 'training',
    seed = 123)

    # Validation Generator
    validation_generator = image_dataset_from_directory(
    path_to_train_data,
    image_size = IMAGE_SIZE,
    batch_size = BATCH_SIZE,
    labels = 'inferred',
    label_mode = 'categorical',
    shuffle = True,
    validation_split = 0.2,
    subset = 'validation',
    seed = 123)

    return train_generator, validation_generator

def test_data_loading(path_to_test_data):

    # Test Generator
    test_generator = image_dataset_from_directory(
    path_to_test_data,
    image_size = IMAGE_SIZE,
    batch_size = BATCH_SIZE,
    labels = 'inferred',
    label_mode = 'categorical',
    shuffle = True,
    seed = 123)

    return test_generator

#model.py
from tensorflow.keras import Sequential, layers
from tensorflow.keras.applications.densenet import DenseNet121
from alzheimers_final_project.params import INPUT_SHAPE, METRICS

def build_compile_model():

    # DenseNet121 Base Model
    base_model = DenseNet121(include_top=False, weights='imagenet', input_shape=INPUT_SHAPE)
    base_model.trainable = False

    # Base Model + Trainable Layers
    model = Sequential()
    model.add(base_model)
    model.add(layers.Flatten())
    model.add(layers.Dense(50, activation='relu'))
    model.add(layers.Dense(4, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS)

    return model

#params.py
import tensorflow as tf

### MODEL PARAMETERS ###
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 32
IMAGE_SIZE = (224, 224)
EPOCHS = 100
INPUT_SHAPE = (224, 224, 3)
METRICS = ['AUC', 'accuracy', 'Recall', 'Precision']


#preprocess.py

### GCP DATA STORAGE ###

BUCKET_NAME = 'alzheimers-project-699'
BUCKET_TRAIN_DATA_PATH = 'data/AlzheimersDataset/train'
STORAGE_LOCATION = 'models/model.joblib'

import numpy as np
import tensorflow as tf

def preprocessing(img, label):
    img = img / 255,
    return img, label

def augment(img, label):
    img = tf.image.random_brightness(
        img, max_delta, seed
    )
    
    img = tf.image.stateless_random_contrast(
        img, 0.2, 0.5, seed
    )
        
    return img, label

##trainer.py
import tensorflow as tf
import joblib
import os
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from alzheimers_final_project.data import train_data_loading, test_data_loading
from alzheimers_final_project.params import STORAGE_LOCATION, BUCKET_NAME
from alzheimers_final_project.model import build_compile_model
from alzheimers_final_project.preprocess import preprocessing
from google.cloud import storage


class Trainer():
    def __init__(self):
        self.model = None
        self.train_path = None
        self.test_path = None
        self.train_generator = None
        self.validation_generator = None
        self.test_generator = None

### MODEL PIPELINE ###
    def set_data(self):
        parent_path = os.path.dirname(os.path.dirname(__file__))
        train_path = os.path.join(parent_path, 'raw_data/AlzheimersDataset/train')
        test_path = os.path.join(parent_path, 'raw_data/AlzheimersDataset/test')
        self.train_path = train_path
        self.test_path = test_path

    def load_data(self):
        # Get and preprocess data
        self.train_generator, self.validation_generator = train_data_loading(self.train_path)
        self.test_generator = test_data_loading(self.test_path)

    def set_model(self):
        # Autotune the process
        AUTOTUNE = tf.data.experimental.AUTOTUNE

        self.train_generator = self.train_generator.map(preprocessing, num_parallel_calls=AUTOTUNE)
        self.train_generator = self.train_generator.map(augment, num_parallel_calls=AUTOTUNE)
        self.validation_generator = self.validation_generator.map(preprocessing, num_parallel_calls=AUTOTUNE)
        self.test_generator = self.test_generator.map(preprocessing, num_parallel_calls=AUTOTUNE)

        self.train_generator = self.train_generator.cache().prefetch(buffer_size=AUTOTUNE)
        self.validation_generator = self.validation_generator.cache().prefetch(buffer_size=AUTOTUNE)
        self.test_generator = self.test_generator.cache().prefetch(buffer_size=AUTOTUNE)
        
        ######sel.train_generator

        self.model = build_compile_model()
        return self.model

    def fit_model(self):
        if self.model == None:
            self.set_model()
        es = EarlyStopping(patience=10, restore_best_weights=True)
        rop = ReduceLROnPlateau(monitor='val_loss', factor=0.005, patience=10, min_lr=0.005)
        self.model.fit(self.train_generator, validation_data=self.validation_generator, epochs=50, callbacks=[es, rop], verbose=1)


    def evaluate(self):
        """evaluates the model on test set and returns the Loss, AUC, Accuracy, Recall and Precision"""
        test_scores = self.model.evaluate(self.test_generator)
        scores_dict = {'Loss': test_scores[0],
                       'AUC': test_scores[1],
                       'Accuracy': test_scores[2],
                       'Recall': test_scores[3],
                       'Precision': test_scores[4]}
        return scores_dict


### SAVE MODEL TO GCP ###
    def save_model(self):
        """ Save the trained model into a model.joblib file """
        joblib.dump(self.pipeline, 'model.joblib')
        client = storage.Client()
        bucket = client.bucket(BUCKET_NAME)
        blob = bucket.blob(STORAGE_LOCATION)
        blob.upload_from_filename('model.joblib')
        print(f"uploaded model.joblib to gcp cloud storage under \n => {STORAGE_LOCATION}")


if __name__ == "__main__":
    t = Trainer()

    t.set_data()
    t.load_data()
    t.set_model()
    t.fit_model()
    t.evaluate()

    # Train model and save to gcp




2021-09-03 16:53:32.620654: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-03 16:53:32.621461: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


ModuleNotFoundError: No module named 'alzheimers_final_project'