### Preprocessing of COCO Annotations File 
This script uses the Pillow library to check the image size and compare it with the coco file prepared in the VGG Image Annotator(VIA). As the outputs from VIA are sometimes wrong, this script corrects the image sizes noted in the coco annotations file.  

Input: Coco annotations file from VIA   
Output: coco annotations file with corrected image sizes 

Additional arguments: folder where images are stored 

In [None]:
# Import required libraries
import glob 
import os 

import json 

from PIL import Image
from PIL import ExifTags

In [None]:
# folder and file locations
folder_path = "C:\\Users\\steff\\Documents\\05 GeoAI\\03 AI\\GeoAI\\data\\collected_20200819\\"
coco_file = folder_path+ "Doorway_21Aug2020_01_coco.json"

# Load original coco annotations file using json library 
with open(coco_file) as file: 
    data = json.load(file)     

In [None]:
# Function for extracting size of image from image file 
def actual_size(image):
    orientation = im._getexif().get(274)
    method = {
        2: Image.FLIP_LEFT_RIGHT,
        3: Image.ROTATE_180,
        4: Image.FLIP_TOP_BOTTOM,
        5: Image.TRANSPOSE,
        6: Image.ROTATE_270,
        7: Image.TRANSVERSE,
        8: Image.ROTATE_90
    }.get(orientation)
    
    if method is not None:
        image = image.transpose(method = method)
    width, height = image.size
    return(width, height)   

In [None]:
# Comparison and correction of actual image size vs size in coco annotations file 
for i in range(len(data["images"])):
    file_name = data["images"][i]["file_name"]
    coco_width = data["images"][i]["width"]
    coco_height = data["images"][i]["height"]
    
    im = Image.open(folder_path+file_name)
    width, height = actual_size(im)
    
    if coco_width != width:
        data["images"][i]["width"] = width
        print(f"Changed width for {file_name}")
        
    if coco_height != height: 
        data["images"][i]["height"] = height 
        print(f"Changed height for {file_name}")
        
with open(f'{folder_path}updated_coco_file.json', 'w') as outfile:
    json.dump(data, outfile)