In [3]:
import boto3           # For interacting with S3
import pandas as pd
import sys             # Python system library needed to load custom functions

# Imports to run Sagemaker training jobs
from sagemaker import get_execution_role
from sagemaker.pytorch import PyTorch
from sagemaker.session import Session

In [4]:
sys.path.append('../src')  # Add the source directory to the PYTHONPATH. This allows to import local functions and modules.

In [7]:
from config import DEFAULT_BUCKET, DEFAULT_REGION  # The name of the S3 bucket that contains the training data
from detection_util import create_predictions
from gdsc_util import download_and_extract_model, set_up_logging, extract_hyperparams, PROJECT_DIR
from tutorial_4_training import load_config as load_config_4
from tutorial_5_training_4k import load_config as load_config_5
from tutorial_5_training_10_epochs import load_config as load_config_5_with_10_epochs
from gdsc_util import load_sections_df
from PredictionEvaluator import PredictionEvaluator
from gdsc_score import get_leaderboard_score

set_up_logging()  # Sets up logging to console and .log

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


In [11]:
#from imutils import contours
from skimage import measure
import numpy as np
#import imutils
import cv2
import matplotlib.pyplot as plt
from scipy.stats import norm
from os import listdir
from os.path import isfile, join

In [13]:
# OpenCv window to display the image
cv2.namedWindow('image', cv2.tutorial_5_training_4k)
cv2.resizeWindow('image', 960, 645)

AttributeError: module 'cv2' has no attribute 'tutorial_5_training_4k'

In [43]:
import glob
import exif
import PIL
from PIL import Image
 
img_paths = glob.glob('../data/jpgs/*.jpg')
 
for img_path in img_paths:
    img = PIL.Image.open(img_path)
    if not img.getexif(): # No EXIF tag at all
        continue 
  
    # Load Image EXIF
    with open(img_path, 'rb') as f:
        img_exif = exif.Image(f)
 # Delete orientation tag and store the image 
 
    if 'orientation' in dir(img_exif):
        print(img_path)
        img_exif.delete('orientation')
        with open(img_path, 'wb') as f:
            f.write(img_exif.get_file())



In [45]:
#Uploading fixed images to YOUR S3 bucket
#Uploading fixed images to YOUR S3 bucket
import os
from gdsc_util import upload_to_s3, PROJECT_DIR
your_bucket_name = 'jpg-team03'
files = os.listdir(f'{PROJECT_DIR}/data/jpgs')
files = [i for i in files if i.endswith('jpg')]
for file in files:
    upload_to_s3(f'{PROJECT_DIR}/data/jpgs/{file}', f'jpgs/{file}', f'{your_bucket_name}')
    
    
upload_to_s3(f'{PROJECT_DIR}/data/gdsc_train.csv', 'gdsc_train.csv', f'{your_bucket_name}')
upload_to_s3(f'{PROJECT_DIR}/data/test_files.csv', 'test_files.csv', f'{your_bucket_name}')

's3://jpg-team03/test_files.csv'

In [46]:
epoch = 'epoch_10'  # Select one of the model checkpoints to load in    

In [47]:
entry_point = 'tutorial_5_training_4k.py'
exp_name = entry_point.split('.')[0].replace('_', '-')  # AWS does not allow . and _ as experiment names
account_id = boto3.client('sts').get_caller_identity().get('Account')
role = get_execution_role()
sm_client = boto3.client("sagemaker", region_name=DEFAULT_REGION)
sess = Session(sagemaker_client=sm_client)
s3_output_location = f"s3://{sess.default_bucket()}/{exp_name}"
input_channels = {"train": f"s3://{your_bucket_name}"}
hyperparameters = extract_hyperparams(entry_point) # custom function to parse the training script and extract config
hyperparameters['base_file'] = base_file

In [54]:
metrics = [
    {"Name": "train:loss_rpn_cls", "Regex": "loss_rpn_cls: ([0-9\.]+)"},
    {"Name": "train:loss_rpn_bbox", "Regex": "loss_rpn_bbox: ([0-9\.]+)"},
    {"Name": "train:loss_cls", "Regex": "loss_cls: ([0-9\.]+)"},
    {"Name": "train:loss_bbox", "Regex": "loss_bbox: ([0-9\.]+)"},
    {"Name": "train:loss", "Regex": "loss: ([0-9\.]+)"},
    {"Name": "train:accuracy", "Regex": "acc: ([0-9\.]+)"},
    {"Name": "train:epoch", "Regex": "Epoch (\[[0-9\.]+\])"},
    {"Name": "val:epoch", "Regex": "Epoch\(val\) (\[[0-9]+\])"},
    {"Name": "val:mAP", "Regex": "mAP: ([0-9\.]+)"},
]

estimator = PyTorch(
    entry_point=entry_point,             # This function will be called by the training job
    source_dir="../src",                 # All code in this folder will be copied over
    image_uri=f"954362353459.dkr.ecr.{DEFAULT_REGION}.amazonaws.com/sm-training-custom:torch-1.8.1-cu111-noGPL",
    role=role,
    output_path=s3_output_location,
    container_log_level=20,             # 10=debug, 20=info
    base_job_name=exp_name,
    instance_count=1,
    instance_type="ml.g4dn.xlarge",     # a GPU instance
    volume_size=65,
    metric_definitions=metrics,
    hyperparameters=hyperparameters,
)

In [49]:
estimator.fit(
    input_channels,
    wait=False,           # Whether or not the notebook should wait for the job to finish. By setting it to False we can continue working while the job runs on another machine.
)

# save the name of the experiment to the filesystem so that we can use it later
experiment_name = estimator._hyperparameters["sagemaker_job_name"]

with open(f'{PROJECT_DIR}/experiment_tut5_max_per_img.txt', 'w+') as f:
    f.write(experiment_name)

2022-07-28 05:42:54,436 - sagemaker.image_uris - INFO - Defaulting to the only supported framework/algorithm version: latest.
2022-07-28 05:42:54,453 - sagemaker.image_uris - INFO - Ignoring unnecessary instance type: None.
2022-07-28 05:42:54,733 - sagemaker - INFO - Creating training-job with name: tutorial-5-training-4k-2022-07-28-05-42-54-434


In [55]:
# read the experiment name from the filesystem
with open(f'{PROJECT_DIR}/experiment_tut5_max_per_img.txt', 'r') as f:
    experiment_name = f.read()
    
experiment_name = 'tutorial-5-training-4k-2022-07-28-05-42-54-434'
model_location = f'{s3_output_location}/{experiment_name}/output/model.tar.gz'
local_model_dir = download_and_extract_model(model_uri=model_location, local_dir='data')

2022-07-28 11:07:19,946 - gdsc_util - INFO - File tutorial-5-training-4k/tutorial-5-training-4k-2022-07-28-05-42-54-434/output/model.tar.gz already exists. Skipping download


In [56]:
experiment_name = 'tutorial-5-training-4k-2022-07-28-05-42-54-434'

In [58]:
prediction_df = pd.read_csv(f'{data_folder}/{experiment_name}/results_tutorial5_test_epoch_8.csv', sep=';')
restricted_prediction_df = prediction_df[prediction_df.detection_score>0.5]
restricted_prediction_df.to_csv(f'{data_folder}/results_tutorial5_epoch_10_min_detection_score.csv', sep=';')

In [None]:
from PIL import Image, ImageFilter
image = Image.open(f'{PROJECT_DIR}/data/jpgs/102_A.jpg')
  
# Converting the image to grayscale, as edge detection 
# requires input image to be of mode = Grayscale (L)
image = image.convert("L")
  
# Detecting Edges on the Image using the argument ImageFilter.FIND_EDGES
image = image.filter(ImageFilter.FIND_EDGES)
  
# Saving the Image Under the name Edge_Sample.png
#image.save(f'{PROJECT_DIR}/data/jpgs/Edge_Sample.jpg')