In [1]:
#Import different libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from google.colab import auth
from google.cloud import storage
import pickle

In [2]:
BUCKET_NAME = 'pulmobucket_dataset'
DATA_DIR = 'data/train'
auth.authenticate_user()
storage_client = storage.Client()

In [3]:
#Parameters setup
IMG_HEIGHT, IMG_WIDTH = 150, 150
BATCH_SIZE = 32
EPOCHS = 2
CLASS_NAMES = ['covid', 'pneumonia', 'normal', 'lung_opacity']

In [4]:
storage_client = storage.Client()


def list_gcs_files(bucket_name, prefix):
    blobs = storage_client.list_blobs(bucket_name, prefix=prefix)
    return [blob.name for blob in blobs if not blob.name.endswith('/')]

def decode_img(img):
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])
    return img / 255.0

def process_path(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    class_str = parts[-2]
    label = tf.cast(tf.equal(class_str, CLASS_NAMES), tf.int32)
    label = tf.argmax(label)
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

def get_gcs_dataset(bucket_name, data_dir, batch_size, img_height, img_width):
    gcs_files = []
    for class_name in CLASS_NAMES:
        class_files = list_gcs_files(bucket_name, f'{data_dir}/{class_name}')
        gcs_files.extend([f'gs://{bucket_name}/{file}' for file in class_files])

    dataset = tf.data.Dataset.from_tensor_slices(gcs_files)
    dataset = dataset.map(lambda x: process_path(x), num_parallel_calls=tf.data.AUTOTUNE)

    dataset_size = len(gcs_files)
    train_size = int(0.8 * dataset_size)
    val_size = dataset_size - train_size

    train_dataset = dataset.take(train_size)
    val_dataset = dataset.skip(train_size)

    train_dataset = train_dataset.cache().shuffle(buffer_size=train_size).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
    val_dataset = val_dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

    return train_dataset, val_dataset

# Obtenir les datasets d'entraînement et de
print("loading data")
train_dataset, val_dataset = get_gcs_dataset(BUCKET_NAME, DATA_DIR, BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH)

loading data


In [5]:
# Définir le modèle
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [6]:
# Compiler le modèle
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [7]:
checkpoint_path = "best_model.h5"
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, verbose=1)
]

In [8]:
# Entraîner le modèle
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=val_dataset,
    callbacks=callbacks
)

Epoch 1/2
Epoch 1: val_loss improved from inf to 2.07879, saving model to best_model.h5


  saving_api.save_model(


Epoch 2/2
Epoch 2: val_loss improved from 2.07879 to 1.30719, saving model to best_model.h5


In [12]:
auth.authenticate_user()

def save_and_upload_model_to_gcs(model, bucket_name, destination_blob_name):
    # Chemin temporaire pour sauvegarder le fichier pickle localement
    local_path = '/tmp/model.pkl'

    # Sauvegarder le modèle dans un fichier pickle
    with open(local_path, 'wb') as f:
        pickle.dump(model, f)

    # Vérifiez que le fichier a été créé
    if os.path.exists(local_path):
        print(f"Model saved locally at {local_path}")
    else:
        print(f"Failed to save model locally at {local_path}")
        return

    # Initialiser le client GCS
    try:
        storage_client = storage.Client()
        bucket = storage_client.bucket(bucket_name)
        blob = bucket.blob(destination_blob_name)

        # Télécharger le fichier pickle dans le bucket GCS
        blob.upload_from_filename(local_path)
        print(f'Model uploaded to {destination_blob_name} in bucket {bucket_name}')
    except Exception as e:
        print(f"Failed to upload model to GCS: {e}")

In [14]:
bucket_name = 'pulmobucket_models'
destination_blob_name = 'GCPmodel.pkl'

save_and_upload_model_to_gcs(model, bucket_name, destination_blob_name)

Model saved locally at /tmp/model.pkl
Model uploaded to GCPmodel.pkl in bucket pulmobucket_models
