In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr'

%load_ext autoreload
%autoreload 2

In [3]:
import json
import os
from collections import Counter

from tqdm import tqdm
import exiftool

# ai4eutils is on path
from path_utils import recursive_file_list

## Peace Parks Foundation images of human


Private dataset.

The folder after the Day/Night level is the number of people present in the image.

The location seems to be the number inside the brackets (), but they are not the same as the ones recorded inside the image footer. Image names without () are from location UMGR16 - these are given '1' as their location, which is unused. There's a small amount of inconsistency in the location label in the (), for example (15) seems to have changed viewshed in the 1-person daytime folder. But the same viewshed do not appear across different locations recorded in (), it seems.

Timestamp is in EXIF. Height and width of image also from EXIF.

Sequence info not present. Could extract from timestamp, but is not done here.

AzCopy'ed the images to blob
```
Elapsed Time (Minutes): 9.2087
Total Number Of Transfers: 4292
```

The `image_id` is the file path without space. The `file_name` field is the path in blob, which contains space chars.

In [22]:
dataset_name = 'peaceparks_201908_humans'

root_dir = '/Users/siyuyang/Source/temp_data/CameraTrap/PPF_humans/201908_humans/'

In [23]:
image_files = recursive_file_list(root_dir)
image_files = [i for i in image_files if i.lower().endswith('.jpg')]

In [24]:
len(image_files) # 4301 - 8 = 4293, two of which do not end with .jpg

4291

Extract EXIF info and make the *embedded* database.

In [40]:
explicit_locations = set()
image_items = []

with exiftool.ExifTool() as et:
    for image_path in tqdm(image_files):
        
        try:
            metadata = et.get_metadata(image_path)
            
            datetime = metadata['EXIF:DateTimeOriginal']
            height = metadata['EXIF:ExifImageHeight']
            width = metadata['EXIF:ExifImageWidth']
            
            file_name = image_path.split(root_dir)[1]
            image_id = file_name.split('.JPG')[0].replace('/', '~')
            
            basename = os.path.basename(file_name)
            if '(' in basename:
                location = str(int(basename.split('(')[1].split(')')[0]))
                explicit_locations.add(location)
            else:
                location = '1'
            
            image_items.append({
                'image_id': image_id,
                'file_name': file_name,
                'dataset': dataset_name,
                
                'width': width,
                'height': height,
                'datetime': datetime,
                'location': location,
                
                'annotations': {
                    'species': ['human']
                }
                
            })
    
        except Exception as e:
            print('Exception with image {}! {}'.format(image_path, e))
            break

100%|██████████| 4291/4291 [00:42<00:00, 100.72it/s]


In [41]:
sorted(list(explicit_locations))

['10', '11', '12', '13', '14', '15', '2', '3', '4', '5', '6', '7', '8', '9']

In [42]:
len(image_items)

4291

In [43]:
image_items[100:102]

[{'annotations': {'species': ['human']},
  'dataset': 'peaceparks_201908_humans',
  'datetime': '2019:06:27 14:19:06',
  'file_name': 'Day/1/IMAG0301.JPG',
  'height': 2448,
  'image_id': 'Day~1~IMAG0301',
  'location': '1',
  'width': 3264},
 {'annotations': {'species': ['human']},
  'dataset': 'peaceparks_201908_humans',
  'datetime': '2019:06:26 17:08:27',
  'file_name': 'Day/1/IMAG0154 (3).JPG',
  'height': 2448,
  'image_id': 'Day~1~IMAG0154 (3)',
  'location': '3',
  'width': 3264}]

In [44]:
with open('/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/cosmos/peaceparks_201908_humans_20190812_embedded.json', 'w') as f:
    json.dump(image_items, f, indent=1)