In [72]:
import pandas as pd
from distutils.dir_util import copy_tree
import os
import numpy as np
import math
import random
import shutil

# contains all of the two-column csv files
csv_dir = "C:/users/clari/downloads/animals-2/"

#contains images in separated folders, e.g. a mammal folder is 'D:/hacks/mammals'
images = "D:/hacks/"
animal_types = ['amphibians', 'arthropods', 'birds', 'mammals', 'mollusks', 'reptiles']
csv_ext = ".csv"
sample_size = 100

Function reads the csv corresponding to that type and creates a data frame from it.

In [60]:
def read_animal_csv(animal_type):
    csv_file = csv_dir + animal_type + csv_ext
    df = pd.read_csv(csv_file)
    df['Scientific Name'] = df['Scientific Name'].apply(lambda name: name.replace(' ', '_'))
    df['Species Label'] = df['Species Label'].apply(lambda name: name if pd.isnull(name) else name.replace(' ', '_'))

    return df

In [61]:
read_animal_csv('mammals')

Unnamed: 0,Common Name,Scientific Name,Broad Category,Species Label,Type
0,Western Gray Squirrel,Sciurus_griseus,Squirrel,,Exact
1,Eastern Fox Squirrel,Sciurus_niger,Squirrel,,Exact
2,Eastern Gray Squirrel,Sciurus_carolinensis,Squirrel,,Exact
3,Eastern Cottontail,Sylvilagus_floridanus,Rabbit,,Exact
4,Desert Cottontail,Sylvilagus_audubonii,Rabbit,,Exact
5,Californian brush rabbit,Sylvilagus_bachmani,Rabbit,,Exact
6,Coyote,Canis_latrans,Coyote,Canis_latrans,Exact
7,Domestic Cat,Felis_catus,Domestic Cat,Felis_catus,Exact
8,Bobcat,Lynx_rufus,Bobcat,Lynx_rufus,Exact
9,Black Rat,Rattus_rattus,Rat,Rattus_rattus,Exact


In [62]:
def destination_path(scientific_name, df):
    row = df.loc[df['Scientific Name'] == scientific_name].iloc[0]
    print(type(row['Species Label']))
    if(pd.isnull(row['Species Label'])):
        return row['Broad Category']
    else:
        return os.path.join(row['Broad Category'], row['Species Label'])
destination_path('Canis_latrans', read_animal_csv('mammals'))

<class 'str'>


'Coyote\\Canis_latrans'

Functions to extract the images of the species from the csv file of the given type.
Results will be in a folder ex. "D:/hacks/mammals_extracted"

In [127]:
def extract_images(animal_df, animal_type):
    containing_dir = images + animal_type + "/"
    destination_dir = images + animal_type + "_extracted"
    if(not os.path.isdir(destination_dir)):
        os.mkdir(destination_dir)
    for scientific_name in animal_df['Scientific Name']:
        print("Processing " + scientific_name)
        extract_animal_images(animal_df, scientific_name, destination_dir, containing_dir)
        
def extract_images_from_type(animal_type):
    extract_images(read_animal_csv(animal_type), animal_type)
    
def extract_animal_images(df, scientific_name, destination_dir, containing_dir):
    row = df.loc[df['Scientific Name'] == scientific_name].iloc[0]
   
    destination_dir = os.path.join(destination_dir, destination_path(scientific_name, df))
    if(not os.path.isdir(destination_dir)):
        os.makedirs(destination_dir)
    if(row['Type'] == 'Sample'):
        # all of the directories that match
        image_dirs = [candidate_dir for candidate_dir in os.listdir(containing_dir) if scientific_name in candidate_dir]
        if(len(image_dirs) == 0):
            print("Not found")
            return;
        num_per_dir =  math.ceil(100.0 / len(image_dirs))
        for image_dir in image_dirs:
            image_dir = os.path.join(containing_dir, image_dir)
            sample_files = random.sample(os.listdir(image_dir), min(num_per_dir, len(os.listdir(image_dir))))
            if(not os.path.isdir(destination_dir)):
                os.mkdir(destination_dir)
            for file in sample_files:
                shutil.copy2(os.path.join(image_dir,file), destination_dir)
    else:
        # collect exact
        image_dir = [candidate_dir for candidate_dir in os.listdir(containing_dir) if scientific_name in candidate_dir]
        if(len(image_dir) > 1):
            print("Duplicate")
        elif(len(image_dir) == 0):
            print("Not found")
        else:
            image_dir = containing_dir + image_dir[0]
            print(destination_dir)
            #copy_tree(image_dir, os.path.join(destination_dir,image_dir.split('/')[-1]))
            if(not os.path.isdir(destination_dir)):
                os.mkdir(destination_dir)
            copy_tree(image_dir, destination_dir)

In [134]:
extract_images_from_type('birds')

Processing Zenaida_macroura
<class 'str'>
D:/hacks/birds_extracted\Bird\Zenaida_macroura
Processing Cardinalis_cardinalis
<class 'str'>
D:/hacks/birds_extracted\Bird\Cardinalis_cardinalis
Processing Turdus_migratorius
<class 'str'>
D:/hacks/birds_extracted\Bird\Turdus_migratorius
Processing Corvus_brachyrhynchos
<class 'str'>
D:/hacks/birds_extracted\Bird\Corvus_brachyrhynchos
Processing Cyanocitta_cristata
<class 'str'>
D:/hacks/birds_extracted\Bird\Cyanocitta_cristata
Processing Melospiza_melodia
<class 'str'>
D:/hacks/birds_extracted\Bird\Melospiza_melodia
Processing Agelaius_phoeniceus
<class 'str'>
D:/hacks/birds_extracted\Bird\Agelaius_phoeniceus
Processing Sturnus_vulgaris
<class 'str'>
D:/hacks/birds_extracted\Bird\Sturnus_vulgaris
Processing Spinus_tristis
<class 'str'>
D:/hacks/birds_extracted\Bird\Spinus_tristis
Processing Branta_canadensis
<class 'str'>
D:/hacks/birds_extracted\Bird\Branta_canadensis
Processing Haemorhous_mexicanus
<class 'str'>
D:/hacks/birds_extracted\Bir

In [75]:
for type in animal_types:
    extract_images_from_type(type)

Processing Sciurus_griseus
Processing Sciurus_niger
Processing Sciurus_carolinensis
Processing Sylvilagus_floridanus
Processing Sylvilagus_audubonii
Processing Sylvilagus_bachmani
Processing Canis_latrans
Processing Felis_catus
Processing Lynx_rufus
Processing Rattus_rattus
Processing Thomomys_bottae
Processing Canis_familiaris
Processing Canis_lupus
Processing Equus_caballus
Processing Equus_asinus
Processing Bos_taurus
Processing Sus_scrofa
Processing Capra_hircus
Processing Ovis_aries
Processing Ursus_americanus
Processing Ursus_arctos
Processing Procyon_lotor
Processing Odocoileus_virginianus
Processing Mephitis_mephitis
Processing Didelphis_virginiana
Processing Vulpes_vulpes
Processing Puma_concolor
Processing Elgaria_multicarinata
Processing Sceloporus_occidentalis
Processing Uta_stansburiana
Processing Anniella_pulchra
Not found
Processing Pituophis_catenifer
Processing Masticophis_flagellum
Processing Chrysemys_picta
Processing Trachemys_scripta
Processing Alligator_mississipp