## Croping & Scaling for the Dataset

In [1]:
import pandas as pd
import glob
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2

%matplotlib inline

### Parameters

You can change some of the parameters here, it should not change any functionality

> FOLDER_NAME: Name of the folder for new dataset to be created
>
> XLEN: Final width of images in the dataset
>
> YLEN: Final height of images in the dataset
>
> NUM_TILES: # of grids for both axes (eg. if set to 5, each image will first be divided to 5x5 and then will be scaled)
>
> Both XLEN and YLEN are currently set to 1366 as it is the default maximum value for the RetinaNet

In [2]:
FOLDER_NAME = 'cropscaled_images_7/'
XLEN = 1333
YLEN = 1333
NUM_TILES = 7

### Cropping and Scaling Function

In [3]:
def crop_tiles(img_name, num_tiles, df, x_len, y_len):
    """
    img_name -> file name of the image
    num_tiles -> # of grid pieces for both x and y (width and height)
    df -> annotations dataframe
    x_len -> desired width of the cropped-then-scaled images
    y_len -> desired height of the cropped-then-scaled images
    """
    updated_annotations = pd.DataFrame()
    anno = df[df[0] == filename]
    #print(anno)
    img_list = []
    img = Image.open(img_name)
    width, height = img.size
    
    new_width = int(width / num_tiles)
    new_height = int(height / num_tiles)
    
    # 0. Scale Ratio
    x_scale = x_len / new_width
    y_scale = y_len / new_height
    
    iterx = 0
    for i in range(0, width, new_width):
        itery = 0
        for j in range(0, height, new_height):
            if ((i + new_width) <= width) & ((j + new_height) <= height):
                #print(f"iter: {iterx}, x1: {i}, x2: {i + new_width}, y1: {j}, y2: {j + new_height}")
                
                # 1. Cropping 
                crop_box = (i, j, i + new_width, j + new_height)
                new_img = img.crop(crop_box)
                
                # 2. Resizing & Updating Annotations
                ##img_list.append(new_img)
                new_img = new_img.resize((x_len, y_len))
                
                matching_annos = anno[(anno[1] > i) & (anno[3] < i + new_width) & (anno[2] > j) & (anno[4] < j + new_height)]
                #print(matching_annos)
                # update wrt new coords
                matching_annos[1] = matching_annos[1] - (new_width * iterx)
                matching_annos[3] = matching_annos[3] - (new_width * iterx)
                matching_annos[2] = matching_annos[2] - (new_height * itery)
                matching_annos[4] = matching_annos[4] - (new_height * itery)
                matching_annos[[1, 3]] = matching_annos[[1, 3]] * x_scale
                matching_annos[[2, 4]] = matching_annos[[2, 4]] * y_scale
                matching_annos[[1, 2, 3, 4]] = matching_annos[[1, 2, 3, 4]].astype(int)
                new_filename = FOLDER_NAME + filename.split('\\')[1].split('.')[0] + '_' + str(iterx) + '_' + str(itery) + '.png'
                matching_annos[0] = new_filename
                #print(matching_annos)
                updated_annotations = updated_annotations.append(matching_annos)
                #print(updated_annotations)
                # 3. Saving & Updating Look-up Table
                new_img.save(new_filename)
                itery += 1
        iterx +=1
    return updated_annotations

### Usage

> 0. The initial annotations file should be ready (function used below expects dataframe)
> 1. Create an empty dataframe,
> 2. Call the function above for all of the images one by one
> 3. The function above, `crop_tiles`, divides large images to grids and return their annotations
> 4. Fill the empty dataframe with function returns

> ! Change the argument for the glob if images are located in another path/folder

In [4]:
df = pd.read_csv('xview-formatted-annotations-val-gpu-full8.csv', header = None)

In [5]:
### SOME BORING PREPROCESSING FOR FILENAMES ###
### YOU CAN SKIP IF NOT NEEDED ###
def get_imgname(row):
    return row.split('/')[1]
def pathfix(row):
    return 'sample\\' + row

In [6]:
tifs = glob.glob('sample\*.tif')
imglist = []
for tif in tifs:
    imglist.append(tif.split('\\')[1])

df['imgname'] = df[0].apply(get_imgname)
df = df[df['imgname'].isin(imglist)]

df[0] = df['imgname'].apply(pathfix)
df.drop('imgname', axis = 1, inplace = True)
### SOME BORING PREPROCESSING FOR FILENAMES -- END -- ###

In [7]:
df.head()

Unnamed: 0,0,1,2,3,4,5
0,sample\100.tif,22,869,33,881,small_car
1,sample\100.tif,5,2075,12,2085,small_car
2,sample\100.tif,3,2341,14,2353,small_car
3,sample\100.tif,2,2410,16,2421,small_car
4,sample\100.tif,90,1070,102,1084,small_car


In [8]:
## FUNCTION IS USED HERE
annotations = pd.DataFrame()
for filename in glob.glob('sample/*.tif'):
    #print(filename)
    #print(df[df[0] == filename])
    annotations = annotations.append(crop_tiles(filename, NUM_TILES, df, XLEN, YLEN))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [9]:
## DONT FORGET TO SAVE THE ANNOTATIONS
annotations.to_csv("cropscale_sample_7.csv", index = False, header = False)

In [10]:
annotations.head()

Unnamed: 0,0,1,2,3,4,5
0,cropscaled_images_7/100_0_2.png,60,327,91,368,small_car
4,cropscaled_images_7/100_0_2.png,248,1019,282,1067,small_car
5,cropscaled_images_7/100_0_2.png,271,1023,295,1060,small_car
6,cropscaled_images_7/100_0_4.png,135,585,176,616,small_car
7,cropscaled_images_7/100_0_4.png,121,1153,157,1202,small_car


In [11]:
xlist = annotations[3] - annotations[1]

In [12]:
xli = np.array(xlist)
xli[xli > 29].shape[0] / xli.shape[0]

0.8386212991604065

In [13]:
ylist = annotations[4] - annotations[2]
yli = np.array(ylist)
yli[yli > 29].shape[0] / yli.shape[0]

0.924259832081308

#### Creating Annotated Images

This is optional, but can be used for checking the correctness of the operations.

Will create a new folder with annotated images in it.

(Run it after establishing the cropped images)

In [14]:
for filename in glob.glob(FOLDER_NAME + '*.png'):
    img = Image.open(filename)
    img = np.array(img)
    matches = annotations[annotations[0] == filename.replace('\\', '/')]
    for index, row in matches.iterrows():
        cv2.rectangle(img,(int(row[1]),int(row[2])),(int(row[3]),int(row[4])),(0,0,255),4)
        
    anno_img = Image.fromarray(img)
    anno_img.save('annotated_cropscaled_7/' + filename.split('\\')[1])