In [1]:
# set up SageMaker environment
import sagemaker, boto3

sess = boto3.Session()
sm = sess.client('sagemaker')
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session(boto_session=sess)

In [2]:
print(role)

arn:aws:iam::844357513200:role/AmazonSageMaker-FullAccessRole


In [22]:
# load packages
import time, os, sys
import numpy as np
import pandas as pd
from PIL import Image
from numpy import asarray

import s3fs
fs = s3fs.S3FileSystem()

import tensorflow as tf

from sagemaker.tensorflow import TensorFlow

In [4]:
# path to the data on S3 bucket
bucket = "sagemaker-michaelwu-ma5852"
subfolder = 'src/Covid19-dataset'
input_data_train_path = f'{subfolder}/train'
input_data_test_path = f'{subfolder}/test'

In [5]:
s3 = sess.resource('s3')
my_bucket = s3.Bucket(bucket)

In [18]:
test = [bucket + '/' + my_bucket_object.key for my_bucket_object in my_bucket.objects.filter(Prefix=input_data_train_path)][-1]
label = test.split("/")[-2]
label

'Normal'

In [71]:
'''
    function to check of the minimum size available in all data folder
    this will tell us what size we have to select to resize the images
'''
def min_size(path):
    image_sizes = {}
    filenames = []
    
    for my_bucket_object in my_bucket.objects.filter(Prefix=path):
        filenames.append(my_bucket_object.key)

    for image_path in filenames:
        image = fs.open(bucket + '/' + image_path)
            
        # get image size and store to the dict
        with Image.open(image) as img:
            width, height = img.size
            image_sizes[image_path] = {'width': width, 'height': height}
    
    df_image_sizes = pd.DataFrame(image_sizes).T
    min_width = df_image_sizes['width'].min()
    min_height = df_image_sizes['height'].min()
    
    return min_height, min_width

In [73]:
min_size(input_data_test_path)

(341, 416)

In [31]:
'''
    loading all images in a numpy array with labels
'''
def load_image_from_folder(path, basewidth, hsize):
    filenames = []
    
    # temp lists
    image_list = []
    labels = []
    
    for my_bucket_object in my_bucket.objects.filter(Prefix=path):
        filenames.append(bucket + '/' + my_bucket_object.key)
    
    for image_path in filenames:
        # get labels from image path
        label = image_path.split("/")[-2]
        labels.append(label)
        
        # converting image and resize it
        img = Image.open(fs.open(image_path)).convert('L')
        img = img.resize((basewidth, hsize), Image.ANTIALIAS)
        frame = asarray(img)
        
        # appending array of image to image_list
        image_list.append(frame)
        
    '''
        Tensorflow needs image channels last, e.g. (width, height, channels)
        so we need to convert images to array channel last format , which can be done using zero matrix
        we first create a dummy zero matrix of same shape with single channel
    '''
    image_list_1 = np.zeros(shape=(np.array(image_list).shape[0], hsize, basewidth, 1))
    for i in range(np.array(image_list).shape[0]):
        # for each sub matrix will be replaced with respective images array
        image_list_1[i, :, :, 0] = image_list[i]
    
    return image_list_1, np.array(labels)    

In [32]:
X_train, y_train = load_image_from_folder(input_data_train_path, 48, 48)

In [30]:
X_train

[array([[ 5,  3,  2, ...,  4,  4,  6],
        [ 4,  3,  3, ...,  2,  3,  3],
        [ 5,  4,  3, ...,  2,  1,  2],
        ...,
        [ 8, 35, 96, ...,  4,  5,  5],
        [ 5, 30, 88, ...,  4,  4,  4],
        [ 3, 27, 83, ...,  3,  3,  5]], dtype=uint8),
 array([[ 54,  68,  73, ...,   1,   1,   2],
        [ 70,  85,  94, ...,   9,   6,   5],
        [ 96, 103, 123, ...,  30,  24,  21],
        ...,
        [118, 147, 178, ..., 121, 107,  90],
        [120, 146, 170, ..., 116, 103,  87],
        [121, 147, 165, ..., 110,  99,  84]], dtype=uint8),
 array([[  3,   0,  14, ..., 199, 201, 193],
        [ 10,  37,  36, ..., 212, 217, 216],
        [ 51,  18,   0, ..., 207, 214, 215],
        ...,
        [  0,   0,   5, ..., 163, 130,  45],
        [  0,   0,   4, ..., 168, 147,  57],
        [  0,   0,   4, ..., 165, 161,  98]], dtype=uint8),
 array([[  0,   0,   0, ...,   0,   0,   0],
        [  0,   9,   9, ...,  27,  19,   3],
        [  2,  61,  81, ...,  57,  46,   8],
       