In [0]:
from google.colab import files  
from google.colab import drive
import zipfile

**Download the zip file dataset in google drive folder**

In [0]:
def download_zip_dataset_and_unmount_gdrive():
  # Install Kaggle library
  !pip install kaggle
  !pip install --upgrade --force-reinstall --no-deps kaggle

  !rm kaggle.json
  # Upload kaggle API key file
  uploaded = files.upload()     # upload kaggle.json

  !rm -rf ../root/.kaggle
  !mkdir ../root/.kaggle
  !cp kaggle.json ../root/.kaggle
  !chmod 600 /root/.kaggle/kaggle.json

  # Download zip file containing the dataset 
  !kaggle competitions download -c intel-mobileodt-cervical-cancer-screening -p /content/drive/My\ Drive/kaggle_dataset

  #drive.flush_and_unmount()

**Extract the zip file in local directory**

In [0]:
def extract_zip_dataset():
  archive = zipfile.ZipFile('/content/drive/My Drive/kaggle_dataset/intel-mobileodt-cervical-cancer-screening.zip')

  # Extract zip, for now we will work only on train and 
  # test images folders
  for file in archive.namelist():
      if file.startswith('train/') or file.startswith('test/') :
          # extract the image with name == file(for example file == train/train/Type_3/465.jpg) 
          # in the /content/ folderm
          archive.extract(file, '/content/')    

**Support function to return an image completely blurred except a specified bounding box**

In [0]:
def blur_image(img_path, startx, starty, width, height):
  startx = int(startx)
  starty = int(starty)
  width = int(width)
  height = int(height)

  original = cv2.imread(img_path)

  blurred = cv2.GaussianBlur(original, (171,171), 0)
  blurred[starty:starty+height,startx:startx+width] = original[starty:starty+height,startx:startx+width] 
  
  blurred = cv2.resize(blurred, (224,224))
  
  return blurred

**Get original images, read tsv file to get the bouding box coordinates, and convert each image to blurred image**

In [0]:
def convert_images_to_blurred():
  
  with open('/content/drive/My Drive/Type_1_bbox.tsv') as tsvfile:
    reader = csv.reader(tsvfile, delimiter=' ')
    for row in reader:
      # iterate over rectangles 
      row[0] = row[0][:6] + "/" + row[0][7:]
      fullpath = "/content/train/train/" + row[0]

      blurred_image = blur_image(fullpath, row[2], row[3], row[4], row[5])
      cv2.imwrite("/content/drive/My Drive/blurred_dataset/"+row[0], blurred_image)

      # remove original image 
      os.remove(fullpath)

  with open('/content/drive/My Drive/Type_2_bboxes.tsv') as tsvfile:
    reader = csv.reader(tsvfile, delimiter=' ')
    for row in reader:
      # iterate over rectangles 
      row[0] = row[0][:6] + "/" + row[0][7:]
      fullpath = "/content/train/train/" + row[0]

      blurred_image = blur_image(fullpath, row[2], row[3], row[4], row[5])
      cv2.imwrite("/content/drive/My Drive/blurred_dataset/"+row[0], blurred_image)

      # remove original image 
      os.remove(fullpath)

  with open('/content/drive/My Drive/Type_3_bbox.tsv') as tsvfile:
    reader = csv.reader(tsvfile, delimiter=' ')
    for row in reader:
      # iterate over rectangles 
      row[0] = row[0][:6] + "/" + row[0][7:]
      fullpath = "/content/train/train/" + row[0] 

      blurred_image = blur_image(fullpath, row[2], row[3], row[4], row[5])
      cv2.imwrite("/content/drive/My Drive/blurred_dataset/"+row[0], blurred_image)

      # remove original image 
      os.remove(fullpath)

  drive.flush_and_unmount()

**Support function to return an image cropped on a specified bounding box**

In [0]:
# makes padding up to desired_size[<==>(width,height)] with BLACK-pixel
def rsz_with_black_border(im, desired_size):
  
  old_size = im.shape[:2] # old_size is in (height, width) format

  ratio = float(desired_size)/max(old_size)
  new_size = tuple([int(x*ratio) for x in old_size])

  # new_size should be in (width, height) format

  im = cv2.resize(im, (new_size[1], new_size[0]))

  delta_w = desired_size - new_size[1]
  delta_h = desired_size - new_size[0]
  top, bottom = delta_h//2, delta_h-(delta_h//2)
  left, right = delta_w//2, delta_w-(delta_w//2)

  color = [0, 0, 0]
  new_im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT,
      value=color)

  return new_im


def crop_image_manual(img_path, startx, starty, width, height):
  startx = int(startx)
  starty = int(starty)
  width = int(width)
  height = int(height)

  original = cv2.imread(img_path)

  original = original[starty:starty+height,startx:startx+width]   # CROP

  original = rsz_with_black_border(original, 224)

  return original


def crop_image(img_path, startx, starty, width, height, rsz_x, rsz_y):
  startx = int(startx)
  starty = int(starty)
  width = int(width)
  height = int(height)

  original = cv2.imread(img_path)
  original = cv2.resize(original, (int(rsz_x),int(rsz_y)))

  original = original[starty:starty+height,startx:startx+width]   # CROP

  original = rsz_with_black_border(original, 224)

  return original


**Get original images, read tsv file to get the bouding box coordinates, and convert each image to a cropped image**

In [0]:
def convert_images_to_cropped_images():
  
  with open('/content/drive/My Drive/Type_1_bbox.tsv') as tsvfile:
    reader = csv.reader(tsvfile, delimiter=' ')
    for row in reader:
      # iterate over rectangles 
      row[0] = row[0][:6] + "/" + row[0][7:]
      fullpath = "/content/train/train/" + row[0]

      blurred_image = blur_image(fullpath, row[2], row[3], row[4], row[5])
      cv2.imwrite("/content/drive/My Drive/blurred_dataset/"+row[0], blurred_image)

      # remove original image 
      os.remove(fullpath)

  with open('/content/drive/My Drive/Type_2_bboxes.tsv') as tsvfile:
    reader = csv.reader(tsvfile, delimiter=' ')
    for row in reader:
      # iterate over rectangles 
      row[0] = row[0][:6] + "/" + row[0][7:]
      fullpath = "/content/train/train/" + row[0]

      blurred_image = blur_image(fullpath, row[2], row[3], row[4], row[5])
      cv2.imwrite("/content/drive/My Drive/blurred_dataset/"+row[0], blurred_image)

      # remove original image 
      os.remove(fullpath)

  with open('/content/drive/My Drive/Type_3_bbox.tsv') as tsvfile:
    reader = csv.reader(tsvfile, delimiter=' ')
    for row in reader:
      # iterate over rectangles 
      row[0] = row[0][:6] + "/" + row[0][7:]
      fullpath = "/content/train/train/" + row[0] 

      blurred_image = blur_image(fullpath, row[2], row[3], row[4], row[5])
      cv2.imwrite("/content/drive/My Drive/blurred_dataset/"+row[0], blurred_image)

      # remove original image 
      os.remove(fullpath)

  drive.flush_and_unmount()