In [1]:
import sys
sys.path.append('..')

In [34]:
import os
from os.path import dirname
from pathlib import Path
from PIL import Image
import pandas as pd
import nitfix.lib.db as db
import nitfix.lib.util as util

In [28]:
cxn = db.connect()

DATA_DIR = Path('..') / 'data'
SAMPLE_DIR = DATA_DIR / 'raw' / 'sampled_images'

THE_2400_MANIFEST = SAMPLE_DIR / 'nitfix_sample_2020-04-07a.csv'
THE_2400_IMAGES = SAMPLE_DIR / 'nitfix_sample_2020-04-07a'

THE_100_NAME = 'nitfix_sample_100_2020-04-09a'
THE_100_MANIFEST = SAMPLE_DIR / (THE_100_NAME + '.csv')
THE_100_IMAGES = SAMPLE_DIR / THE_100_NAME

PHOTOS = DATA_DIR / 'raw' / 'photos'
TEMP_DIR = DATA_DIR / 'temp'

In [4]:
sql = """SELECT * FROM images WHERE image_file LIKE '%NY_DOE%';"""
df = pd.read_sql(sql, cxn)
df.shape

(3539, 2)

In [6]:
df.head()

Unnamed: 0,image_file,sample_id
0,NY_DOE-nitfix_visit3/R0004890.JPG,655cbcbb-19f8-4664-9eb5-e59291dbbd37
1,NY_DOE-nitfix_visit3/R0004891.JPG,65527f87-8e12-4c15-91e6-1aa2f282f1b7
2,NY_DOE-nitfix_visit3/R0004892.JPG,6552469c-0952-4185-8b93-7fde26747b0e
3,NY_DOE-nitfix_visit3/R0004893.JPG,653fc52a-c880-4507-96b4-eaf3f2db2218
4,NY_DOE-nitfix_visit3/R0004894.JPG,653bfbc9-a3a5-4464-b8cd-574b5db4e96e


In [8]:
used = pd.read_csv(THE_2400_MANIFEST)['image_file']
used.shape

(2400,)

In [10]:
df2 = df.loc[df['image_file'].isin(used) == False, :]
df2.shape

(3017, 2)

In [11]:
df3 = df2.sample(n=100)
df3.shape

(100, 2)

In [13]:
df3['manifest_file'] = df3['image_file'].str.replace('/', '_')

In [14]:
df3.head()

Unnamed: 0,image_file,sample_id,manifest_file
1370,NY_DOE-nitfix_visit4/R0006285.JPG,2d205bb7-3788-4326-b4bc-1f38c84fb003,NY_DOE-nitfix_visit4_R0006285.JPG
2665,NY_DOE-nitfix_visit5/R0007589.JPG,cae3f489-4c32-44cb-9bac-668f27cec5dd,NY_DOE-nitfix_visit5_R0007589.JPG
1724,NY_DOE-nitfix_visit4/R0006643.JPG,f32d9c33-69cc-45ac-ad29-b1f1c3ed2d23,NY_DOE-nitfix_visit4_R0006643.JPG
1377,NY_DOE-nitfix_visit4/R0006292.JPG,2cf5dfab-f821-4394-a798-b5515fd3da67,NY_DOE-nitfix_visit4_R0006292.JPG
1289,NY_DOE-nitfix_visit4/R0006200.JPG,2b678ccf-8316-44cd-8625-342f99c886b1,NY_DOE-nitfix_visit4_R0006200.JPG


In [35]:
def zip_images(images, image_dir):
    """Shrink and rotate images and then put them into a zip file."""
    image_zip_dir = TEMP_DIR / image_dir
    os.makedirs(image_zip_dir, exist_ok=True)

    for _, image_file in images.image_file.iteritems():
        src = PHOTOS / image_file
        dst = image_zip_dir / image_file.replace('/', '_')
        original = Image.open(src)
        transformed = original.resize((
            int(original.size[0] * 0.75),
            int(original.size[1] * 0.75)))
        dir_name = dirname(image_file)
        if original.size[0] > original.size[1]:
            if (dir_name.startswith('Tingshuang')
                    and dir_name != 'Tingshuang_US_nitfix_photos') \
               or dir_name in (
                   'MO-DOE-nitfix_visit3', 'NY_DOE-nitfix_visit3',
                   'NY_DOE-nitfix_visit4', 'NY_DOE-nitfix_visit5'):
                transformed = transformed.transpose(Image.ROTATE_90)
            else:
                transformed = transformed.transpose(Image.ROTATE_270)
        transformed.save(dst)


In [37]:
df3.to_csv(THE_100_MANIFEST, index=False)

In [36]:
zip_images(df3, THE_100_IMAGES)

NY_DOE-nitfix_visit4/R0006285.JPG
NY_DOE-nitfix_visit5/R0007589.JPG
NY_DOE-nitfix_visit4/R0006643.JPG
NY_DOE-nitfix_visit4/R0006292.JPG
NY_DOE-nitfix_visit4/R0006200.JPG
NY_DOE-nitfix_visit4/R0007081.JPG
NY_DOE-nitfix_visit5/R0007851.JPG
NY_DOE-nitfix_visit4/R0006471.JPG
NY_DOE-nitfix_visit3/R0005169.JPG
NY_DOE-nitfix_visit5/R0007859.JPG
NY_DOE-nitfix_visit3/R0005069.JPG
NY_DOE-nitfix_visit3/R0005788.JPG
NY_DOE-nitfix_visit4/R0006483.JPG
NY_DOE-nitfix_visit4/R0006007.JPG
NY_DOE-nitfix_visit4/R0006804.JPG
NY_DOE-nitfix_visit5/R0008338.JPG
NY_DOE-nitfix_visit5/R0007667.JPG
NY_DOE-nitfix_visit5/R0008470.JPG
NY_DOE-nitfix_visit4/R0006734.JPG
NY_DOE-nitfix_visit4/R0006180.JPG
NY_DOE-nitfix_visit5/R0008135.JPG
NY_DOE-nitfix_visit4/R0006862.JPG
NY_DOE-nitfix_visit5/R0007564.JPG
NY_DOE-nitfix_visit3/R0005638.JPG
NY_DOE-nitfix_visit3/R0005608.JPG
NY_DOE-nitfix_visit4/R0006422.JPG
NY_DOE-nitfix_visit3/R0005089.JPG
NY_DOE-nitfix_visit3/R0005450.JPG
NY_DOE-nitfix_visit5/R0008331.JPG
NY_DOE-nitfix_