### This script is for generating crop slices from main image rather than the images workers see and marked. 
### So output images show the worker annotations for object locations.

In [1]:
from PIL import Image,ImageDraw
import json
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib.image as mpimg
from math import floor

from urllib.parse import urlparse
import urllib.request, json
import requests
from io import BytesIO
import os
Image.MAX_IMAGE_PIXELS = None

import boto3
import datetime
import json
import pandas as pd
import os
from pathlib import Path

### Crop corner images that may location annotations towards corners or edges of main image.

In [2]:
def crop_corner_images(im:Image, xy:tuple, size:int, marker_color:tuple, image_file_path):
    x,y = xy
    crop = im.crop((x-size, y-size, x+size, y+size))
    assert crop.size, "Invalid crop size."
   
    #print(crop.size)
    wc,hc = crop.size
    draw = ImageDraw.Draw(crop)
    w1,h1 = wc//2,hc//2
    
    draw.line((w1, 0)+ ( w1,hc), fill=marker_color,width=1)
    draw.line((0, h1)+ ( wc,h1), fill=marker_color, width=1)
   
    crop.convert('RGB').save(image_file_path)
    w,h = crop.size

### Mark vehicle location on the cropped slice

In [3]:
def mark_bounding_boxes(imarr:np.array, marker_color:tuple, image_file_path:os.path):
    img = Image.fromarray(imarr)#.resize((400,400), Image.LANCZOS)
    draw = ImageDraw.Draw(img)
   
    
    w,h = img.size

    p = ( int(floor(w/2))-50, int(floor(h/2))-50, int(floor(w/2))+50, int(floor(h/2))+50 )
    #print(p)

  
    w1,h1 = (w//2),(h//2)
    draw.line((w1, 0)+ ( w1,h), fill=marker_color, width=1)
    draw.line((0, h1)+ ( w,h1), fill=marker_color,width=1)
   
    img.convert('RGB').save(image_file_path)
    w,h = img.size

#### Correct the path of short answers from Selwyn dataset. Following lines of code needs to be updated in next cell .
`filedir = os.path.join(os.path.join(r'C:\Users\exx\Documents\lab'), "LINZ","Final","001_selwyn-0125m-urban-aerial-photos-2012-2013")`

In [4]:
path = Path(os.getcwd())
#get folder's storage
filedir = os.path.join(os.path.join(r'C:\Users\exx\Documents\lab'), "LINZ","Final","001_selwyn-0125m-urban-aerial-photos-2012-2013")
main_folders = [os.path.join(filedir,name) for name in os.listdir(filedir)]
main_folders

['C:\\Users\\exx\\Documents\\lab\\LINZ\\Final\\001_selwyn-0125m-urban-aerial-photos-2012-2013\\0001',
 'C:\\Users\\exx\\Documents\\lab\\LINZ\\Final\\001_selwyn-0125m-urban-aerial-photos-2012-2013\\0002',
 'C:\\Users\\exx\\Documents\\lab\\LINZ\\Final\\001_selwyn-0125m-urban-aerial-photos-2012-2013\\0003',
 'C:\\Users\\exx\\Documents\\lab\\LINZ\\Final\\001_selwyn-0125m-urban-aerial-photos-2012-2013\\0004',
 'C:\\Users\\exx\\Documents\\lab\\LINZ\\Final\\001_selwyn-0125m-urban-aerial-photos-2012-2013\\0005',
 'C:\\Users\\exx\\Documents\\lab\\LINZ\\Final\\001_selwyn-0125m-urban-aerial-photos-2012-2013\\0006',
 'C:\\Users\\exx\\Documents\\lab\\LINZ\\Final\\001_selwyn-0125m-urban-aerial-photos-2012-2013\\0007',
 'C:\\Users\\exx\\Documents\\lab\\LINZ\\Final\\001_selwyn-0125m-urban-aerial-photos-2012-2013\\0008',
 'C:\\Users\\exx\\Documents\\lab\\LINZ\\Final\\001_selwyn-0125m-urban-aerial-photos-2012-2013\\0009']

### Load the results path

In [5]:
rootpath = Path(os.getcwd())
answers_path = os.path.join(rootpath,"batch100_HITs","answers", "selwyn_answers_identification.csv")
output_path = os.path.join(rootpath,"batch100_HITs","results")
results_path = os.path.join(rootpath,"batch100_HITs","batch_results")

In [6]:
files = os.listdir(results_path)
files

['Batch_xxxxxx_batch_results.csv']

### Load batch results to Dataframe

In [7]:
df = pd.read_csv(os.path.join(results_path,files[0]))

In [8]:
df.columns

Index(['HITId', 'HITTypeId', 'Title', 'Description', 'Keywords', 'Reward',
       'CreationTime', 'MaxAssignments', 'RequesterAnnotation',
       'AssignmentDurationInSeconds', 'AutoApprovalDelayInSeconds',
       'Expiration', 'NumberOfSimilarHITs', 'LifetimeInSeconds',
       'AssignmentId', 'WorkerId', 'AssignmentStatus', 'AcceptTime',
       'SubmitTime', 'AutoApprovalTime', 'ApprovalTime', 'RejectionTime',
       'RequesterFeedback', 'WorkTimeInSeconds', 'LifetimeApprovalRate',
       'Last30DaysApprovalRate', 'Last7DaysApprovalRate', 'Input.image1_url',
       'Input.image2_url', 'Input.image3_url', 'Input.image4_url',
       'Input.image5_url', 'Input.image6_url', 'Input.image7_url',
       'Input.image8_url', 'Input.image9_url', 'Input.image10_url',
       'Input.image11_url', 'Input.image12_url', 'Input.image13_url',
       'Input.image14_url', 'Input.image15_url', 'Input.image16_url',
       'Input.image17_url', 'Input.image18_url', 'Input.image19_url',
       'Input.image20_

In [9]:
submitted_answers = df[['HITId','Answer.taskAnswers', 'WorkerId', 'WorkTimeInSeconds', 'LifetimeApprovalRate','Approve', 'Reject']]

In [10]:
submitted_answers['WorkTimeInSeconds'].max()

1132

In [11]:
submitted_answers[['Answer.taskAnswers']]

Unnamed: 0,Answer.taskAnswers
0,"[{""feedback"":""I didn't see vehicles in some of..."
1,"[{""feedback"":""Interesting"",""https://mturk-s3-c..."
2,"[{""feedback"":""Interesting"",""https://mturk-s3-c..."
3,"[{""feedback"":""There have no vehicle in some of..."
4,"[{""feedback"":""some of the pictures very diffic..."


In [12]:
workers = list(submitted_answers.groupby(['WorkerId']).groups.keys())
len(workers)

5

### Filter by worker id

In [13]:
submitted_answers.loc[submitted_answers['WorkerId'] == workers[0]] [['Answer.taskAnswers']]

Unnamed: 0,Answer.taskAnswers
0,"[{""feedback"":""I didn't see vehicles in some of..."


### Get hit ids from results

In [14]:
hitids = list(submitted_answers.groupby(['HITId']).groups.keys())

In [15]:
len(hitids)

5

### Load the Ground truth with answers

In [16]:
answers = pd.read_csv(answers_path)

In [17]:
answers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   image_url      20 non-null     object
 1   url            20 non-null     object
 2   vehicle_types  20 non-null     object
 3   # of vehicles  20 non-null     int64 
 4   truck          2 non-null      object
 5   small          3 non-null      object
 6   specialized    1 non-null      object
 7   trailer_small  3 non-null      object
dtypes: int64(1), object(7)
memory usage: 1.4+ KB


In [18]:
#h,w,_ = imarray.shape
overlap = 0.2
tile_size = 300
stride = int(tile_size * (1-overlap))
window_width = tile_size
marker_color = (200,200,200,255)

cropsize = 306
size = int(cropsize/2)

### For each worker, extract locations, transform to main image locations and crop the slices from main images (main images are actually larger than the slices.)
#### Each of cropped slice is saved to image file path that has been sent as input to each of crop_corner_images or mark_bounding_boxes.
#### if you want to save these images to specific path, please make sure to pass the correct path to both of these crop_corner_images or mark_bounding_boxes methods.

In [20]:
for worker in workers:
    worker_total_scores = 0
    total_worker_time = 0
    worker_answers = submitted_answers.loc[submitted_answers['WorkerId'] == worker]
    
    for index in worker_answers.index:
        feedback = ""
    
        ans = worker_answers['Answer.taskAnswers'][index]
        #print(ans)
        ans = json.loads(ans)#.replace("[","").replace("]",""))
        
        if not os.path.exists(os.path.join(output_path,"from_main_image",worker)):
            os.mkdir(os.path.join(output_path,"from_main_image",worker))
        outdirectory = os.path.join(output_path,"from_main_image",worker)
        
        for k,v in ans[0].items():
            if k != "feedback" :
                foldername, filename = k.split("/")[6:]
                x,y = filename.split(".")[0].split("-")[2:]
                x,y = int(x),int(y)
                mainfolder,subfolder = foldername.split("_")
                main_image_name = foldername+"_image.png"
                row = answers.loc[answers['url'] == k].fillna('')
                #print(filename)
                
                if not os.path.exists(os.path.join(outdirectory,foldername)):
                    os.mkdir(os.path.join(outdirectory,foldername))

                if len(v['keypoints']) != 0:
                    im = Image.open(os.path.join(filedir,mainfolder, subfolder, main_image_name))
                    #print(im.size,x,y )
                    w,h = im.size
                    imarray = np.asarray(im)
                    #print(imarray.shape)
                    pts = [[int(pt['x']),int(pt['y'])] for pt in v["keypoints"]]
                    for pt in pts:
                        #print(pt, pt[1]+x, pt[0]+y)
                        i,j = pt[1]+y, pt[0]+x
                        imname = os.path.join(outdirectory,foldername, str(i)+"-"+str(j)+".png")

                        crop_slice = np.s_[i-size:i+size, j-size:j+size]
                        needs_padding = False
                        for slic in crop_slice:
                            if slic.start < 0 or (slic.stop-slic.start) < (2*size):
                                #print(slic.start, slic.stop-slic.start, 2*size)
                                needs_padding = True
                                break
                        #print(needs_padding, marker_color, x,y)
                        if needs_padding:
                            crop_corner_images(im, (j,i), size, marker_color,imname)
                        else:
                            imarr = imarray[crop_slice]
                            h,w,_ = imarr.shape
                            if h < cropsize or w < cropsize:
                                crop_corner_images(im, (j,i), size, marker_color,imname)
                            else:
                                mark_bounding_boxes(imarray[crop_slice],marker_color, imname)

                        