# This notebook is supposed to work on Google Colab
### First of all, the dataset must be unzipped in */content/drive/MyDrive* in order to make this notebook work
You can download it at this link: https://drive.google.com/file/d/1NFxqVt6z7sOaO45ifu-wQTodx-rPbuo5/view?usp=sharing

N.B.: the dataset ath the provided link contains only the images related to the Bipbip team and the Haricot crop 

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import os
import tensorflow as tf
import numpy as np
 
SEED = 1234
tf.random.set_seed(SEED)  

cwd = os.path.join(os.getcwd(), 'drive', 'MyDrive') 

In [None]:
import requests
r = requests.get('https://competitions.codalab.org/my/datasets/download/df18097f-54f4-4faa-b09f-64bd4d7ac0e5')
with open(os.path.join(cwd, 'Final_Dataset.zip'),'wb') as f:
  f.write(r.content)

1166728522

In [None]:
from google.colab import drive
drive.mount('/content/drive', True)

Mounted at /content/drive


In [None]:
!unzip /content/drive/My\ Drive/Final_Dataset.zip -d /content/drive/My\ Drive

## Preprocess data
### Split into training and validation
The dataset is splitted according to these steps:
1. We apply to each list of filenames a random boolean array created from a Bernoullian distribution with probability defined by the percentage of the split we want (e.g.: 75% for training, 25% for validation), where `p=[train_percentage, 1-train_percentage]`, obtaining the training sets of images and masks
2. We apply the logical not of the previous boolean array to the same lists of filenames, obtaining the validation sets of images and masks

In [None]:
def train_validation_holdout(images_path, masks_path, train_perc = 0.75):
    images = np.array(os.listdir(images_path))
    masks = np.array(os.listdir(masks_path))
                       
    train_selection_array = np.random.choice([True,False], len(images), p=[train_perc, 1-train_perc])
    valid_selection_array = np.logical_not(train_selection_array)
    
    training_images_set = images[train_selection_array]
    training_masks_set = masks[train_selection_array]
    validation_images_set = images[valid_selection_array]
    validation_masks_set = masks[valid_selection_array]
    
    return training_images_set, training_masks_set, validation_images_set, validation_masks_set

In [None]:
training_dir = os.path.join(cwd, 'Development_Dataset', 'Training')
validation_dir = os.path.join(cwd, 'Development_Dataset', 'Validation')
test_dir = os.path.join(cwd, 'Development_Dataset', 'Test_Dev')
teams = ['Bipbip', 'Weedelec']
plants = ['Haricot', 'Mais']
subfolders = {'img': 'Images', 
              'mask': 'Masks'}

In [None]:
# Let's split training and validation sets for each dataset we have
train_datasets = {}
valid_datasets = {}
for team in teams:
    for plant in plants:
         # Holdout
        dataset_path = os.path.join(test_dir, team, plant)
        images_path = os.path.join(dataset_path, subfolders['img'])
        masks_path = os.path.join(dataset_path, subfolders['mask'])
        
        (training_images_set, training_masks_set, 
        validation_images_set, validation_masks_set) = train_validation_holdout(images_path, masks_path, train_perc=0.75)
        
        # Dataset division in different dictionaries
        training_images_path = os.path.join(training_dir, team, plant, subfolders['img'])
        training_masks_path = os.path.join(training_dir, team, plant, subfolders['mask'])
        validation_images_path = os.path.join(validation_dir, team, plant, subfolders['img'])
        validation_masks_path = os.path.join(validation_dir, team, plant, subfolders['mask'])
        
        train_datasets[training_images_path] = training_images_set
        train_datasets[training_masks_path] = training_masks_set
        valid_datasets[validation_images_path] = validation_images_set
        valid_datasets[validation_masks_path] = validation_masks_set

In [None]:
# Verify how the split was performed

key_train = ""
for key in train_datasets.keys():
   key_train = key
   break

key_valid = ""
for key in valid_datasets.keys():
   key_valid = key
   break

print("Training images: {}, Validation images: {}".format(len(train_datasets[key_train]), len(valid_datasets[key_valid])))

Training images: 8, Validation images: 7


### Move files to appropriate folders
We want to have a directory structure like this:

  - Training/
    - Bipbip/
        - Haricot/
            - Images/
            - Masks/
        - Mais/
            - Images/
            - Masks/
  - Validation/
    - Bipbip/
        - Haricot/
            - Images/
            - Masks/
        - Mais/
            - Images/
            - Masks/
  - Test_Dev/
    - Bipbip/
        - Haricot/
            - Images/
        - Mais/
            - Images/

for each different dataset we have

In [None]:
# We move files in the validation set to the appropriate directory
import shutil

for path in train_datasets.keys():
    if not os.path.exists(path):
        os.makedirs(path)
    for data in train_datasets[path]:
        old_path = os.path.join(path.replace('Training', 'Test_Dev'), data)
        new_path = os.path.join(path, data)
        shutil.move(old_path, path)

for path in valid_datasets.keys():
    if not os.path.exists(path):
        os.makedirs(path)
    for data in valid_datasets[path]:
        old_path = os.path.join(path.replace('Validation', 'Test_Dev'), data)
        new_path = os.path.join(path, data)
        shutil.move(old_path, path)

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Images/Bipbip_haricot_im_00581.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Images/Bipbip_haricot_im_00721.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Images/Bipbip_haricot_im_02781.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Images/Bipbip_haricot_im_02841.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Images/Bipbip_haricot_im_02901.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Images/Bipbip_haricot_im_06581.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Images/Bipbip_haricot_im_06751.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Images/Bipbip_haricot_im_07421.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Masks/Bipbip_haricot_im_00581.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Masks/Bipbip_haricot_im_00721.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Masks/Bipbip_haricot_im_02781.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Masks/Bipbip_haricot_im_02841.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Masks/Bipbip_haricot_im_02901.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Masks/Bipbip_haricot_im_06581.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Masks/Bipbip_haricot_im_06751.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Haricot/Masks/Bipbip_haricot_im_07421.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Images/Bipbip_mais_im_04121.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Images/Bipbip_mais_im_06381.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Images/Bipbip_mais_im_06831.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Images/Bipbip_mais_im_07611.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Images/Bipbip_mais_im_07681.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Images/Bipbip_mais_im_09091.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Images/Bipbip_mais_im_10941.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Images/Bipbip_mais_im_11021.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Masks/Bipbip_mais_im_04121.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Masks/Bipbip_mais_im_06381.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Masks/Bipbip_mais_im_06831.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Masks/Bipbip_mais_im_07611.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Masks/Bipbip_mais_im_07681.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Masks/Bipbip_mais_im_09091.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Masks/Bipbip_mais_im_10941.png'

'/content/drive/MyDrive/Development_Dataset/Training/Bipbip/Mais/Masks/Bipbip_mais_im_11021.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T114347-2.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T114419-14.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T114537-44.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T114558-52.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T114726-86.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T114852-119.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T115031-157.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T115142-185.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T115628-295.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T115906-356.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T115942-370.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T120146-418.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T120225-433.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T120248-442.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T114347-2.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T114419-14.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T114537-44.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T114558-52.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T114726-86.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T114852-119.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T115031-157.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T115142-185.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T115628-295.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T115906-356.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T115942-370.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T120146-418.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T120225-433.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T120248-442.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T120608-14.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121047-122.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121103-128.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121125-137.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121133-140.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121212-155.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121237-165.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121350-193.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121539-235.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121800-290.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121851-310.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T122021-345.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T122227-395.jpg'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T120608-14.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121047-122.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121103-128.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121125-137.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121133-140.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121212-155.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121237-165.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121350-193.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121539-235.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121800-290.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121851-310.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T122021-345.png'

'/content/drive/MyDrive/Development_Dataset/Training/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T122227-395.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Images/Bipbip_haricot_im_00211.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Images/Bipbip_haricot_im_00951.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Images/Bipbip_haricot_im_01341.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Images/Bipbip_haricot_im_02421.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Images/Bipbip_haricot_im_03691.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Images/Bipbip_haricot_im_07181.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Images/Bipbip_haricot_im_07331.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Masks/Bipbip_haricot_im_00211.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Masks/Bipbip_haricot_im_00951.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Masks/Bipbip_haricot_im_01341.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Masks/Bipbip_haricot_im_02421.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Masks/Bipbip_haricot_im_03691.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Masks/Bipbip_haricot_im_07181.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Haricot/Masks/Bipbip_haricot_im_07331.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Images/Bipbip_mais_im_01931.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Images/Bipbip_mais_im_02211.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Images/Bipbip_mais_im_03621.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Images/Bipbip_mais_im_05521.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Images/Bipbip_mais_im_09571.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Images/Bipbip_mais_im_09781.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Images/Bipbip_mais_im_10441.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Masks/Bipbip_mais_im_01931.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Masks/Bipbip_mais_im_02211.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Masks/Bipbip_mais_im_03621.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Masks/Bipbip_mais_im_05521.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Masks/Bipbip_mais_im_09571.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Masks/Bipbip_mais_im_09781.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Bipbip/Mais/Masks/Bipbip_mais_im_10441.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Weedelec/Haricot/Images/Weedelec_haricot_2019-09-25T114608-56.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Weedelec/Haricot/Masks/Weedelec_haricot_2019-09-25T114608-56.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T121602-244.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Weedelec/Mais/Images/Weedelec_mais_2019-09-25T122122-369.jpg'

'/content/drive/MyDrive/Development_Dataset/Validation/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T121602-244.png'

'/content/drive/MyDrive/Development_Dataset/Validation/Weedelec/Mais/Masks/Weedelec_mais_2019-09-25T122122-369.png'