In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from transformers import MaskFormerFeatureExtractor, MaskFormerForInstanceSegmentation
import PIL
from PIL import Image
import requests
from transformers import pipeline
import datasets
from datasets import load_dataset
import os
import evaluate
import torch
import cv2
import json
import codecs
import os
from os import sys

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
module_path = os.path.abspath(os.path.join('./src'))
if module_path not in sys.path:
    sys.path.append(module_path)
from constants import *

## Split WE3DS dataset per crop type

In [3]:
print(plant_classification)

Crop indicies:  [2, 5, 6, 11, 14, 15, 18]
Weed indicies:  [3, 4, 7, 8, 9, 10, 12, 13, 16, 17]


In [None]:
print("Crop indicies: ", crop_indices)
print("Weed indicies: ", weed_indices)

In [4]:
# Create a dictionary that lists for every image whic classes are present in it
output_file_path = './meta/WE3DS_class_presence.json'
we3ds_class_presence = np.array([])

if not os.path.isfile(output_file_path):
    for image_path, annotation_path in zip(image_paths, annotation_paths):
        im = cv2.imread(annotation_folder + annotation_path)
        unique_image_classes = np.unique(im)   
        image_class_pressence = {
            'image_path': image_path,
            'class_presence': np.zeros(len(plant_classification)).tolist()
        }

        for unique_image_class in unique_image_classes:
            image_class_pressence['class_presence'][unique_image_class] = 1

        we3ds_class_presence = np.append(we3ds_class_presence, image_class_pressence)
    
    with open(output_file_path, 'w') as file:
        we3ds_class_presence_as_list = we3ds_class_presence.tolist()
        json.dump(we3ds_class_presence_as_list, file)

else:
    we3ds_class_presence = json.load(codecs.open(output_file_path, 'r', 'utf-8-sig'))

In [5]:
we3ds_class_presence

[{'image_path': 'img_00000.png',
  'class_presence': [1,
   1,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   1,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0]},
 {'image_path': 'img_00001.png',
  'class_presence': [0.0,
   1,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   1,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0]},
 {'image_path': 'img_00002.png',
  'class_presence': [1,
   1,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   1,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0]},
 {'image_path': 'img_00003.png',
  'class_presence': [0.0,
   1,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   1,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0]},
 {'image_path': 'img_00004.png',
  'class_presence': [0.0,
   1,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   1,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0]},

In [6]:
# Determine how many images contain each specific crop
# Store the list of images that contain a specific crop in a meta json file
all_crop_images = []
for crop_index in crop_indices:
    crop_images = [entry['image_path'] for entry in we3ds_class_presence if entry['class_presence'][crop_index] == 1]
    print(list(plant_classification.keys())[crop_index], len(crop_images))
    all_crop_images = np.append(all_crop_images, crop_images)

    output_file_path = './meta/' + list(plant_classification.keys())[crop_index].replace(" ", "_") + '_images.json'
    with open(output_file_path, 'w') as file:
        json.dump(crop_images, file)

print("Number of images that contain at least one crop: ", len(all_crop_images))
print("Number of images that contain only one crop: ", len(np.unique(all_crop_images)))

broad bean 210
common buckwheat 137
pea 207
corn 403
soybean 303
sunflower 135
sugar beet 410
Number of images that contain at least one crop:  1805
Number of images that contain only one crop:  1803


In [7]:
# Determine which images contain more than one crop
# Determine which crops are depicted in this images
unique_images, counts = np.unique(all_crop_images, return_counts=True)
duplicate_images = unique_images[counts > 1]
print("Images that contain more than one crop: ", duplicate_images)
for image in duplicate_images:
    im = cv2.imread(annotation_folder + image)
    print("Image ", image, " contains instances of classes: ", np.unique(im))

Images that contain more than one crop:  ['img_01096.png' 'img_01098.png']
Image  img_01096.png  contains instances of classes:  [ 0  1  6 15]
Image  img_01098.png  contains instances of classes:  [ 0  1  6 15]


In [8]:
# Determine the images that contain no crops by computing 
# the difference between the set of all images and the set of all crop images
all_images_set = set(image_paths)
crop_images_set = set(all_crop_images)

no_crop_images_set = all_images_set - crop_images_set

# Store list of images with no crops in meta json file
no_crop_images = list(no_crop_images_set)
output_file_path = './meta/no_crop_images.json'
with open(output_file_path, 'w') as file:
    json.dump(no_crop_images, file)

print("Number of all images is", len(all_images_set))
print("Number of images that contain crops is", len(crop_images_set))
print("Number of images that contain no crops is", len(no_crop_images))
if (len(crop_images_set) + len(no_crop_images) == len(all_images_set)):
    print("The above numbers were determined correctly!")

Number of all images is 2568
Number of images that contain crops is 1803
Number of images that contain no crops is 765
The above numbers were determined correctly!


In [9]:
# Determine the number of images that contain each specific weed type
for weed_index in weed_indices:
    weed_images = [entry['image_path'] for entry in we3ds_class_presence if entry['class_presence'][weed_index] == 1]
    weed_images_set = set(weed_images)
    weed_no_crop_images = weed_images_set.intersection(no_crop_images_set)

    output_file_path = './meta/' + list(plant_classification.keys())[weed_index].replace(" ", "_") + '_no_crop_images.json'
    with open(output_file_path, 'w') as file:
        json.dump(list(weed_no_crop_images), file)

    print(list(plant_classification.keys())[weed_index], len(weed_images), "(with no crops", len(weed_no_crop_images), ")")

corn spurry 20 (with no crops 20 )
red-root amaranth 34 (with no crops 3 )
red fingergrass 8 (with no crops 8 )
common wild oat 44 (with no crops 25 )
cornflower 286 (with no crops 162 )
corn cockle 277 (with no crops 200 )
milk thistle 226 (with no crops 154 )
rye brome 60 (with no crops 60 )
narrow-leaved plantain 22 (with no crops 22 )
small-flower geranium 156 (with no crops 72 )


In [10]:
# Compute per crop type how many weeds are present on the same images
for crop_index in crop_indices:
    crop_images = [entry['image_path'] for entry in we3ds_class_presence if entry['class_presence'][crop_index] == 1]
    print(list(plant_classification.keys())[crop_index], len(crop_images))

    for weed_index in weed_indices:
        weed_images = [entry['image_path'] for entry in we3ds_class_presence if entry['class_presence'][weed_index] == 1]
        weed_in_crop_images = np.intersect1d(crop_images, weed_images)
        print("   ", list(plant_classification.keys())[weed_index], len(weed_in_crop_images))

print("Images with no crops ", len(no_crop_images))
for weed_index in weed_indices:
    weed_images = [entry['image_path'] for entry in we3ds_class_presence if entry['class_presence'][weed_index] == 1]
    weed_in_crop_images = np.intersect1d(no_crop_images, weed_images)
    print("   ", list(plant_classification.keys())[weed_index], len(weed_in_crop_images))


broad bean 210
    corn spurry 0
    red-root amaranth 0
    red fingergrass 0
    common wild oat 0
    cornflower 0
    corn cockle 0
    milk thistle 0
    rye brome 0
    narrow-leaved plantain 0
    small-flower geranium 0
common buckwheat 137
    corn spurry 0
    red-root amaranth 0
    red fingergrass 0
    common wild oat 0
    cornflower 0
    corn cockle 0
    milk thistle 2
    rye brome 0
    narrow-leaved plantain 0
    small-flower geranium 0
pea 207
    corn spurry 0
    red-root amaranth 0
    red fingergrass 0
    common wild oat 0
    cornflower 0
    corn cockle 0
    milk thistle 1
    rye brome 0
    narrow-leaved plantain 0
    small-flower geranium 0
corn 403
    corn spurry 0
    red-root amaranth 18
    red fingergrass 0
    common wild oat 0
    cornflower 58
    corn cockle 0
    milk thistle 0
    rye brome 0
    narrow-leaved plantain 0
    small-flower geranium 49
soybean 303
    corn spurry 0
    red-root amaranth 5
    red fingergrass 0
    common wild 

In [11]:
# Examplary check rye brome images
rye_brome_images = [entry['image_path'] for entry in we3ds_class_presence if entry['class_presence'][13] == 1]
print(rye_brome_images)
print(len(rye_brome_images))

['img_00937.png', 'img_00960.png', 'img_00961.png', 'img_00962.png', 'img_00963.png', 'img_00964.png', 'img_00965.png', 'img_00966.png', 'img_00967.png', 'img_00968.png', 'img_00969.png', 'img_01050.png', 'img_01051.png', 'img_01052.png', 'img_01053.png', 'img_01054.png', 'img_01055.png', 'img_01056.png', 'img_01057.png', 'img_01058.png', 'img_01059.png', 'img_01149.png', 'img_01150.png', 'img_01151.png', 'img_01152.png', 'img_01153.png', 'img_01154.png', 'img_01155.png', 'img_01156.png', 'img_01157.png', 'img_01158.png', 'img_01199.png', 'img_01200.png', 'img_01201.png', 'img_01202.png', 'img_01203.png', 'img_01204.png', 'img_01205.png', 'img_01206.png', 'img_01207.png', 'img_01208.png', 'img_01269.png', 'img_01270.png', 'img_01271.png', 'img_01272.png', 'img_01273.png', 'img_01274.png', 'img_01275.png', 'img_01276.png', 'img_01277.png', 'img_01278.png', 'img_01769.png', 'img_01770.png', 'img_01771.png', 'img_01772.png', 'img_01773.png', 'img_01774.png', 'img_01775.png', 'img_01776.pn

In [12]:
# Examplary check void images
void_images = [entry['image_path'] for entry in we3ds_class_presence if entry['class_presence'][0] == 1]
print(void_images)
print(len(void_images))

['img_00000.png', 'img_00002.png', 'img_00006.png', 'img_00009.png', 'img_00040.png', 'img_00049.png', 'img_00052.png', 'img_00058.png', 'img_00061.png', 'img_00063.png', 'img_00068.png', 'img_00069.png', 'img_00070.png', 'img_00071.png', 'img_00077.png', 'img_00078.png', 'img_00082.png', 'img_00083.png', 'img_00084.png', 'img_00086.png', 'img_00089.png', 'img_00093.png', 'img_00094.png', 'img_00095.png', 'img_00096.png', 'img_00097.png', 'img_00099.png', 'img_00100.png', 'img_00101.png', 'img_00103.png', 'img_00104.png', 'img_00105.png', 'img_00107.png', 'img_00108.png', 'img_00109.png', 'img_00110.png', 'img_00111.png', 'img_00113.png', 'img_00114.png', 'img_00115.png', 'img_00120.png', 'img_00124.png', 'img_00130.png', 'img_00134.png', 'img_00138.png', 'img_00139.png', 'img_00140.png', 'img_00141.png', 'img_00148.png', 'img_00151.png', 'img_00161.png', 'img_00167.png', 'img_00173.png', 'img_00177.png', 'img_00178.png', 'img_00182.png', 'img_00183.png', 'img_00186.png', 'img_00187.pn