# Smartcrop

Images of birds provided by eBirds are in various formats. To extract features of good quality, we need a standardised squared format with the bird contained in it. As the bird can potentially appear in any place of the image, a automated, centered cropping is rather bad solution. This script is used to crop a squared subset of the image containing the bird. It is based on a function available on this Github repository. It achieves good results for the most of the images, but can fail in rare cases, i.g. when the bird appears quite distant and on a complex background. We consider that this performance is sufficient for our needs.

# Don't run all cells !

## First package : https://github.com/smartcrop/smartcrop.py

This first package gives best pixels coordinates to crop a picture keeping only the interesting features. For each image, it proposes 3 best crops. I tried to adapt it to directly crop and store the pictures. However, the results are not that good and I found an other packages that achievied better cropping.

In [None]:
#import smartcrop
#from PIL import Image
#import pandas as pd

In [None]:
#Image.open("nocrop/nocrop003.jpg")

In [None]:
#image = Image.open("nocrop/nocrop003.jpg")
#
#sc = smartcrop.SmartCrop()
#result = sc.crop(image, 500, 500)
#result

In [None]:
#x1 = result["crops"][0]["x"]
#y1 = result["crops"][0]["y"]
#x2 = x1+result["crops"][0]["width"]
#y2 = y1+result["crops"][0]["height"]

In [None]:
#cropped = image.crop((x1,y1,x2,y2))
#cropped

In [None]:
#cropped.save("data/other/cormoran.jpg")

In [None]:
#def square_crop(path_to_image, width, heigth):
#    image = Image.open(path_to_image)
#    sc = smartcrop.SmartCrop()
#    coord = sc.crop(image, width, heigth)
#    x1 = coord["top_crop"]["x"]
#    y1 = coord["top_crop"]["y"]
#    x2 = x1+coord["top_crop"]["width"]
#    y2 = y1+coord["top_crop"]["height"]
#    cropped = image.crop((x1,y1,x2,y2))
#    return cropped
#
##smartcrop doesn't resize the images to the indicated width and height
##add resize after having checked for the format of each image in metadata

In [None]:
#square_crop("nocrop/nocrop007.jpg",300,300)

In [None]:
#data = pd.read_csv("data/metadata/selection.csv", index_col=0)

In [None]:
##add "crop" variable to the dataframe, with the path to the cropped images
#data["crop_storage"] = data["storage"].str.replace("original", "cropped")

In [None]:
##crop images as a test
#
#fa = data[data["Scientific Name"]=="Chroicocephalus ridibundus"]
#
#for i in range(len(fa)): # for the number of  observations
#    file = fa.iloc[i,:]["storage"]
#    cropped = square_crop(file,300,300)
#    output_file = fa.iloc[i,:]["crop_storage"]
#    cropped.save(output_file)

## Second package (https://github.com/epixelic/python-smart-crop, require OpenCV): 

In [None]:
import smartcrop
import pandas as pd
from tqdm import tqdm

In [None]:
#load data
data = pd.read_csv("data/metadata/selection.csv")

In [None]:
#define new variable with path for the cropped images 
data["crop_storage"] = data.storage.str.replace("original","cropped")

In [None]:
#crop images "Chroicocephalus ridibundus" as a test

fa = data[data["Scientific Name"]=="Chroicocephalus ridibundus"]

for i in range(len(fa)): # for the number of  observations
    file = fa.iloc[i,:]["storage"]
    crop_file = fa.iloc[i,:]["crop_storage"]
    cropped = smartcrop.smart_crop(file, 300, 300, crop_file, True)

In [None]:
#for each species
for s in data["Scientific Name"].unique():
    
    #define a dataframe with the observation of the spectis
    fa = data[data["Scientific Name"]==s]

    for i in tqdm(range(len(fa))): # for the number of  observations in the dataframe
        file = fa.iloc[i,:]["storage"] #get the input path
        crop_file = fa.iloc[i,:]["crop_storage"] #get the output path
        cropped = smartcrop.smart_crop(file, 224, 224, crop_file, True) #crop the picture and store the result

## Print cropped images

In [None]:
import math
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

In [None]:
#load data
data = pd.read_csv("data/metadata/selection.csv", index_col=0)

#define new variable with path for the cropped images 
data["crop_storage"] = data.storage.str.replace("original","cropped")

In [None]:
#print cropped images
#create dataframe for 'Podiceps cristatus'
pc = data[data["Scientific Name"]=='Podiceps cristatus']

#get the number of observations
n = len(pc)

#plot the observations
fig,axes = plt.subplots(nrows=math.ceil(n/3), ncols=3)
fig.set_figwidth(20)
fig.set_figheight(540)

for i in tqdm(range(n)):
    pil_img = Image.open(pc.iloc[i,:]["crop_storage"]) #for each observation, get the storage path and open a pillow image
    np_img = np.array(pil_img) #turn the image into an array
    axes.ravel()[i].imshow(np_img) #display the image at the right position in the grid
    ML = pc.iloc[i,:]["ML Catalog Number"] #get the ML number of the observation
    rating = pc.iloc[i,:]["Average Community Rating"] #get the average rating
    axes.ravel()[i].set_title("ML : {} -- rating : {} -- index : {}".format(ML,rating,i)) #print the ML number and the index in species dataframe and dimensions

plt.tight_layout()

plt.savefig("data/visual_assessment/podiceps_cristatus_crop.jpg") #save the figure as a jpg file

In [None]:
#print cropped images

#create dataframe for 'Fulica atra'
fa = data[data["Scientific Name"]=='Fulica atra']

#get the number of observations
n = len(fa)

#plot the observations
fig,axes = plt.subplots(nrows=math.ceil(n/3), ncols=3)
fig.set_figwidth(20)
fig.set_figheight(500)

for i in tqdm(range(n)):
    pil_img = Image.open(fa.iloc[i,:]["crop_storage"]) #for each observation, get the storage path and open a pillow image
    np_img = np.array(pil_img) #turn the image into an array
    axes.ravel()[i].imshow(np_img) #display the image at the right position in the grid
    ML = fa.iloc[i,:]["ML Catalog Number"] #get the ML number of the observation
    rating = fa.iloc[i,:]["Average Community Rating"] #get the average rating
    axes.ravel()[i].set_title("ML : {} -- rating : {} -- index : {}".format(ML,rating,i)) #print the ML number and the index in species dataframe and dimensions

plt.tight_layout()

plt.savefig("data/visual_assessment/fulica_atra_crop.jpg") #save the figure as a jpg file

In [None]:
#print the cropped images

#create dataframe for 'Chroicocephalus ridibundus'
cr = data[data["Scientific Name"]=='Chroicocephalus ridibundus']

#get the number of observations
n = len(cr)

#plot the observations
fig,axes = plt.subplots(nrows=math.ceil(n/3), ncols=3)
fig.set_figwidth(20)
fig.set_figheight(540)

for i in tqdm(range(n)):
    pil_img = Image.open(cr.iloc[i,:]["crop_storage"]) #for each observation, get the storage path and open a pillow image
    np_img = np.array(pil_img) #turn the image into an array
    axes.ravel()[i].imshow(np_img) #display the image at the right position in the grid
    ML = cr.iloc[i,:]["ML Catalog Number"] #get the ML number of the observation
    rating = cr.iloc[i,:]["Average Community Rating"] #get the average rating
    axes.ravel()[i].set_title("ML : {} -- rating : {} -- index : {}".format(ML,rating,i)) #print the ML number and the index in species dataframe and dimensions

plt.tight_layout()    
    
plt.savefig("data/visual_assessment/chroicocephalus_ridibundus_crop.jpg") #save the figure as a jpg file

In [None]:
#print cropped images

#create dataframe for 'Cygnus olor'
co = data[data["Scientific Name"]=='Cygnus olor']

#get the number of observations
n = len(co)

#plot the observations
fig,axes = plt.subplots(nrows=math.ceil(n/3), ncols=3)
fig.set_figwidth(20)
fig.set_figheight(420)

for i in tqdm(range(n)):
    pil_img = Image.open(co.iloc[i,:]["crop_storage"]) #for each observation, get the storage path and open a pillow image
    np_img = np.array(pil_img) #turn the image into an array
    axes.ravel()[i].imshow(np_img) #display the image at the right position in the grid
    ML = co.iloc[i,:]["ML Catalog Number"] #get the ML number of the observation
    rating = co.iloc[i,:]["Average Community Rating"] #get the average rating
    axes.ravel()[i].set_title("ML : {} -- rating : {} -- index : {}".format(ML,rating,i)) #print the ML number and the index in species dataframe and dimensions

plt.tight_layout()    
    
plt.savefig("data/visual_assessment/cygnus_olor_crop.jpg") #save the figure as a jpg file

In [None]:
#print cropped images

#create dataframe for 'Pyrrhocorax graculus'
pg = data[data["Scientific Name"]=='Pyrrhocorax graculus']

#get the number of observations
n = len(pg)

#plot the observations
fig,axes = plt.subplots(nrows=math.ceil(n/3), ncols=3)
fig.set_figwidth(20)
fig.set_figheight(380)

for i in tqdm(range(n)):
    pil_img = Image.open(pg.iloc[i,:]["crop_storage"]) #for each observation, get the storage path and open a pillow image
    np_img = np.array(pil_img) #turn the image into an array
    axes.ravel()[i].imshow(np_img) #display the image at the right position in the grid
    ML = pg.iloc[i,:]["ML Catalog Number"] #get the ML number of the observation
    rating = pg.iloc[i,:]["Average Community Rating"] #get the average rating
    axes.ravel()[i].set_title("ML : {} -- rating : {} -- index : {}".format(ML,rating,i)) #print the ML number and the index in species dataframe and dimensions

plt.tight_layout()    
    
plt.savefig("data/visual_assessment/pyrrhocorax_graculus_crop.jpg") #save the figure as a jpg file

In [None]:
#print cropped images

#create dataframe for 'Netta rufina'
nr = data[data["Scientific Name"]=='Netta rufina']

#get the number of observations
n = len(nr)

#plot the observations
fig,axes = plt.subplots(nrows=math.ceil(n/3), ncols=3)
fig.set_figwidth(20)
fig.set_figheight(500)

for i in tqdm(range(n)):
    pil_img = Image.open(nr.iloc[i,:]["crop_storage"]) #for each observation, get the storage path and open a pillow image
    np_img = np.array(pil_img) #turn the image into an array
    axes.ravel()[i].imshow(np_img) #display the image at the right position in the grid
    ML = nr.iloc[i,:]["ML Catalog Number"] #get the ML number of the observation
    rating = nr.iloc[i,:]["Average Community Rating"] #get the average rating
    axes.ravel()[i].set_title("ML : {} -- rating : {} -- index : {}".format(ML,rating,i)) #print the ML number and the index in species dataframe and dimensions

plt.tight_layout()    
    
plt.savefig("data/visual_assessment/netta_rufina_crop.jpg") #save the figure as a jpg file

In [None]:
#print cropped images

#create dataframe for 'Turdus merula'
tm = data[data["Scientific Name"]=='Turdus merula']

#get the number of observations
n = len(tm)

#plot the observations
fig,axes = plt.subplots(nrows=math.ceil(n/3), ncols=3)
fig.set_figwidth(20)
fig.set_figheight(150)

for i in tqdm(range(n)):
    pil_img = Image.open(tm.iloc[i,:]["crop_storage"]) #for each observation, get the storage path and open a pillow image
    np_img = np.array(pil_img) #turn the image into an array
    axes.ravel()[i].imshow(np_img) #display the image at the right position in the grid
    ML = tm.iloc[i,:]["ML Catalog Number"] #get the ML number of the observation
    rating = tm.iloc[i,:]["Average Community Rating"] #get the average rating
    axes.ravel()[i].set_title("ML : {} -- rating : {} -- index : {}".format(ML,rating,i)) #print the ML number and the index in species dataframe and dimensions

plt.tight_layout()    
    
plt.savefig("data/visual_assessment/turdus_merula_crop.jpg") #save the figure as a jpg file