# Data Preprocessing

To clean data before uploading to cloud storage, I need to correct datetimes, rename so all images have a unique name, and crop out information bars.

In [122]:
import datetime
import exif
from PIL import Image

Images from the Browning camera initially had incorrect datetimes. To calculate the change in datetime, I looked at the datetimes between a trigger image series of incorrect and corrected datetimes. 

In [40]:
time_delta = datetime.datetime(2019, 6, 30, 10, 9, 0) - datetime.datetime(2018, 1, 28, 13, 23, 0)
time_delta

datetime.timedelta(517, 74760)

In [70]:
def change_datetime(old_datetime, time_delta):
    """Take in old datetime string and change in days, hours, and minutes.
    Update datetime with offsets and return as string."""
    old_dt = datetime.datetime.strptime(str(old_datetime)[2:-1], '%Y:%m:%d %H:%M:%S')
    new_dt = old_dt + time_delta
    new_dt_str = new_dt.strftime("%Y:%m:%d %H:%M:%S")
    return new_dt_str

In [107]:
def change_exif_datetimes(image_path, time_delta):
    """Take in image file and change in days, hours, and minutes.
    Change exif datime, datetime_digitized, and datetime_origina
    Save file with updated exif dates.
    """
    # open image
    img = Image.open(image_path)
    # load exif data into dictionary
    exif_dict = piexif.load(img.info['exif'])
    # generate updated datetime
    old_dt_str = exif_dict["0th"][306]
    print(old_dt_str)
    new_dt_str = change_datetime(old_dt_str, time_delta)
    # replace exif datetimes with updated value
    exif_dict["0th"][306] = new_dt_str.encode("utf-8")
    print(exif_dict['0th'][306])
    exif_dict["Exif"][36868] = new_dt_str.encode("utf-8")
    exif_dict["Exif"][36867] = new_dt_str.encode("utf-8")
    # Workaround for exif dump
    exif_dict['Exif'][41988] = 0.
    # Convert into bytes and put in image file
    exif_bytes = piexif.dump(exif_dict)
    piexif.insert(exif_bytes, image_path)
    print(f"Updated exif datetimes!")
    

In [108]:
image_path = 'images_to_adjust/IMG_0001.JPG'

In [110]:
from exif import Image

In [111]:
with open(image_path, 'rb') as image_file:
    image = Image(image_file)

In [112]:
image.has_exif

True

In [113]:
dir(image)

['<unknown EXIF tag 33426>',
 '_exif_ifd_pointer',
 '_interoperability_ifd_Pointer',
 '_segments',
 'aperture_value',
 'color_space',
 'components_configuration',
 'compressed_bits_per_pixel',
 'compression',
 'datetime',
 'datetime_digitized',
 'datetime_original',
 'digital_zoom_ratio',
 'exif_version',
 'exposure_bias_value',
 'exposure_mode',
 'exposure_program',
 'exposure_time',
 'f_number',
 'file_source',
 'flash',
 'flashpix_version',
 'focal_length',
 'focal_length_in_35mm_film',
 'get',
 'get_file',
 'has_exif',
 'jpeg_interchange_format',
 'jpeg_interchange_format_length',
 'light_source',
 'make',
 'maker_note',
 'max_aperture_value',
 'metering_mode',
 'model',
 'orientation',
 'photographic_sensitivity',
 'pixel_x_dimension',
 'pixel_y_dimension',
 'resolution_unit',
 'scene_capture_type',
 'scene_type',
 'sharpness',
 'shutter_speed_value',
 'software',
 'user_comment',
 'white_balance',
 'x_resolution',
 'y_and_c_positioning',
 'y_resolution']

In [118]:
image.datetime = "2019:01:20 00:21:45"
image.datetime_digitized = "2019:01:20 00:21:45"
image.datetime_original = "2019:01:20 00:21:45"

In [121]:
with open("images_to_adjust/IMG_0001.JPG", 'wb') as new_image_file:
    new_image_file.write(image.get_file())