## Extracting Coordinate data from HEIC & JPG images

-- will be using these coordinates as input for viewshed testing

In [1]:
from PIL import Image as IM
from pillow_heif import register_heif_opener
from exif import Image
from pathlib import Path
import pandas as pd

In [2]:
def get_exif(filename):
    '''
    For HEIC images
    sourced from: https://stackoverflow.com/questions/72522522/how-to-extract-gps-location-from-heic-files
    '''
    image = IM.open(filename)
    image.verify()
    return image.getexif().get_ifd(0x8825)


def get_geotagging(exif):
    '''
    extracts GPS datat from HEIC imagery
    sourced from: https://stackoverflow.com/questions/72522522/how-to-extract-gps-location-from-heic-files
    '''
    geo_tagging_info = {}
    if not exif:
        raise ValueError("No EXIF metadata found")
    else:
        gps_keys = ['GPSVersionID', 'GPSLatitudeRef', 'GPSLatitude', 'GPSLongitudeRef', 'GPSLongitude',
                    'GPSAltitudeRef', 'GPSAltitude', 'GPSTimeStamp', 'GPSSatellites', 'GPSStatus', 'GPSMeasureMode',
                    'GPSDOP', 'GPSSpeedRef', 'GPSSpeed', 'GPSTrackRef', 'GPSTrack', 'GPSImgDirectionRef',
                    'GPSImgDirection', 'GPSMapDatum', 'GPSDestLatitudeRef', 'GPSDestLatitude', 'GPSDestLongitudeRef',
                    'GPSDestLongitude', 'GPSDestBearingRef', 'GPSDestBearing', 'GPSDestDistanceRef', 'GPSDestDistance',
                    'GPSProcessingMethod', 'GPSAreaInformation', 'GPSDateStamp', 'GPSDifferential']

        for k, v in exif.items():
            try:
                geo_tagging_info[gps_keys[k]] = str(v)
            except IndexError:
                pass
        return geo_tagging_info

def extract_JPG_met(image): 
    '''
    extracting GPS coordinates from JPG imagery
    sourced from: https://medium.com/spatial-data-science/how-to-extract-gps-coordinates-from-images-in-python-e66e542af354
    '''
    dict = {}
    with open(image, "rb") as src:
        img = Image(src)
        if img.has_exif:
            info = f" has the EXIF {img.exif_version}"
            dict['GPSLatitudeRef'] = img.gps_latitude_ref
            dict['GPSLatitude'] = img.gps_latitude
            dict['GPSLongitudeRef'] = img.gps_longitude_ref
            dict['GPSLongitude'] = img.gps_longitude
            dict['GPSAltitudeRef'] = img.gps_altitude_ref
            dict['GPSAltitude'] = img.gps_altitude
            dict['GPSSpeedRef'] = img.gps_speed_ref
            dict['GPSSpeed'] = img.gps_speed
            dict['GPSImgDirectionRef'] = img.gps_img_direction_ref
            dict['GPSImgDirection'] = img.gps_img_direction
            dict['GPSDestBearingRef'] = img.gps_dest_bearing_ref
            dict['GPSDestBearing'] = img.gps_dest_bearing
            dict['GPSDateStamp'] = img.datetime
        else:
            info = "does not contain any EXIF information"
            print(f"Image {src.name}: {info}")
        return dict
        
    
        # both JPG and HEIC metedata are stored in dictionaries with the following attributes: 
        # {'GPSLatitudeRef': 'N', 
        # 'GPSLatitude': '(3x.0, 5x.0, 1x.0x)', 
        # 'GPSLongitudeRef': 'W', 
        # 'GPSLongitude': '(8x.0, 2x.0, 5x.2x)', 
        # 'GPSAltitudeRef': "b'\\x00'", 
        # 'GPSAltitude': '279.63243243243244', 
        # 'GPSSpeedRef': 'K', 
        # 'GPSSpeed': '0.04649941997239198', 
        # 'GPSImgDirectionRef': 'T', 
        # 'GPSImgDirection': '274.37165833514456', 
        # 'GPSDestBearingRef': 'T', 
        # 'GPSDestBearing': '27x.37165833514456', 
        # 'GPSDateStamp': '2022:06:12'}

def to_decimal_degree(degree, minute, second, direction):
    '''
    Converts GPS coorinates to Decimal degrees
    sourced from: https://stackoverflow.com/questions/33997361/how-to-convert-degree-minute-second-to-degree-decimal
    '''
    dd = float(degree) + (float(minute)/60) + (float(second)/3600)
    if direction == 'W'  or direction == 'S':
        dd *= -1
    
    return dd


### Extracting GPS Metadata from Imagery

In [3]:
# assign directory where my Images are: 
directory = 'Images/'
GPS_Dict = {}

# iterate over specific files in 'Images' to extract the GPS data from HEIC files
files = Path(directory).glob('*.HEIC')
for file in files:
    names = str(file).split('\\')
    register_heif_opener()
    newname = directory + (names[1].split('.'))[0] + '.heic'
    image_info = get_exif(newname)
    GPS_Dict[names[1]] = get_geotagging(image_info)

# iterate over specific files in 'Images' to extract the GPS data from JPG files   
files = Path(directory).glob('*.jpg')
for file in files:
    names = str(file).split('\\')
    GPS_Dict[names[1]] = extract_JPG_met(file)


### Converting GPS data from DMS to Decimal Degree

(there is redundant code here...could eventually work towards reformating it)

Note the following code loops through the metedata (GPS info) extracted above. The issue is that Data for GPSLatitude and GPSLongtitude are different datatypes (tuple for JPGS and strings for HEIC files) depending image files type. Thus to convert DMS to DD, I must differentiate tuple data from string and then convert....

In [34]:
#converting GPS_dict to a pandas dataframe
df = pd.DataFrame(data=GPS_Dict)
gps_df = df.T

##converting the DMS values for each rows lat and long
number = -1
dictionary_Lat = {}
dictionary_Long = {}

for index in gps_df.index:
    DMS_lat_jpg = []
    DMS_long_jpg = []
    DMS_lat_heic = []
    DMS_long_heic = []
    number += 1
    
    #jpg store as tuples with floats
    if isinstance(gps_df['GPSLatitude'][number], tuple) == True:
        print('Jpg Lat as Tuple')
        for tup in gps_df['GPSLatitude'][number]:
            DMS_lat_jpg.append(tup)
        Lat_jpg = to_decimal_degree(DMS_lat_jpg[0], DMS_lat_jpg[1], DMS_lat_jpg[2], gps_df['GPSLatitudeRef'][number])
        dictionary_Lat[index] = Lat_jpg

    if isinstance(gps_df['GPSLongitude'][number], tuple) == True:
        print('Jpg Long as Tuple')
        for tup in gps_df['GPSLongitude'][number]:
            DMS_long_jpg.append(tup)
        Long_jpg = to_decimal_degree(DMS_long_jpg[0], DMS_long_jpg[1], DMS_long_jpg[2], gps_df['GPSLongitudeRef'][number])
        dictionary_Long[index] = Long_jpg

        
    if isinstance(gps_df['GPSLatitude'][number], tuple) == False or isinstance(gps_df['GPSLongitude'][number], tuple) == False:
        print('HEIC Lat/Long as String')
        #Heic file stored as strings
        values_lat = gps_df['GPSLatitude'][number].split(',')
        values_long = gps_df['GPSLongitude'][number].split(',')
        #cleaning values for LAt
        for x in range(len(values_lat)):
            check1 = values_lat[x].replace(',', '')
            check2 = check1.replace('(', '')
            DMS_lat_heic.append(check2.replace(')', ''))
        #cleaning values for long
        for y in range(len(values_long)):
            check1 = values_long[y].replace(',', '')
            check2 = check1.replace('(', '')
            DMS_long_heic.append(check2.replace(')', ''))
        
        Lat_heic = to_decimal_degree(DMS_lat_heic[0], DMS_lat_heic[1], DMS_lat_heic[2], gps_df['GPSLatitudeRef'][number])
        Long_heic = to_decimal_degree(DMS_long_heic[0], DMS_long_heic[1], DMS_long_heic[2], gps_df['GPSLongitudeRef'][number])
        
        dictionary_Lat[index] = Lat_heic
        dictionary_Long[index] = Long_heic

##adding new columns for decimal degrees    
gps_df['decimal_Lat'] = gps_df.index.map(dictionary_Lat)
gps_df['decimal_Long'] = gps_df.index.map(dictionary_Long)

Jpg Lat/Long as String
Jpg Lat/Long as String
Jpg Lat/Long as String
Jpg Lat as Tuple
Jpg Long as Tuple
Jpg Lat as Tuple
Jpg Long as Tuple
Jpg Lat as Tuple
Jpg Long as Tuple


## export dataframe as csv file

In [37]:
gps_df.to_csv("GPS_metadata.csv")

### Scratch Work!!
Used for troubleshooting the above code

In [35]:
dictionary_Long

{'image1.HEIC': -113.51356388888888,
 'image2.HEIC': -113.36959722222221,
 'image3.HEIC': -113.38379722222223,
 'image4.JPG': -113.71822222222222,
 'image5.JPG': -113.7901,
 'image8.JPG': -113.72827777777778}

In [31]:
for number in range(len(gps_df)):
    ##there are TUPLES so why aren't they being detected? -->investigate the 'isinstance()' 
    print(number)
    print('Long:', isinstance((gps_df['GPSLongitude'][number]), type(tuple)), 'Lat:', isinstance(gps_df['GPSLatitude'][number], type(tuple)))
    print('long:', gps_df['GPSLongitude'][number], 'Lat:', gps_df['GPSLatitude'][number])

0
Long: False Lat: False
long: (113.0, 30.0, 48.83) Lat: (48.0, 34.0, 27.46)
1
Long: False Lat: False
long: (113.0, 22.0, 10.55) Lat: (48.0, 29.0, 0.82)
2
Long: False Lat: False
long: (113.0, 23.0, 1.67) Lat: (48.0, 36.0, 8.43)
3
Long: False Lat: False
long: (113.0, 43.0, 5.6) Lat: (48.0, 41.0, 47.11)
4
Long: False Lat: False
long: (113.0, 47.0, 24.36) Lat: (48.0, 39.0, 35.09)
5
Long: False Lat: False
long: (113.0, 43.0, 41.8) Lat: (48.0, 43.0, 36.3)


In [33]:
for number in range(len(gps_df)):
    variable1 = gps_df['GPSLongitude'][number]
    variable2 = gps_df['GPSLatitude'][number]
    ##there are TUPLES so why aren't they being detected? -->investigate the 'isinstance()' 
    print(number)
    print('Long:', isinstance(variable1, tuple), 'Lat:', isinstance(variable2, type(tuple)))
    print('long:', gps_df['GPSLongitude'][number], 'Lat:', gps_df['GPSLatitude'][number])

0
Long: False Lat: False
long: (113.0, 30.0, 48.83) Lat: (48.0, 34.0, 27.46)
1
Long: False Lat: False
long: (113.0, 22.0, 10.55) Lat: (48.0, 29.0, 0.82)
2
Long: False Lat: False
long: (113.0, 23.0, 1.67) Lat: (48.0, 36.0, 8.43)
3
Long: True Lat: False
long: (113.0, 43.0, 5.6) Lat: (48.0, 41.0, 47.11)
4
Long: True Lat: False
long: (113.0, 47.0, 24.36) Lat: (48.0, 39.0, 35.09)
5
Long: True Lat: False
long: (113.0, 43.0, 41.8) Lat: (48.0, 43.0, 36.3)


In [36]:
#looking at the data
gps_df

Unnamed: 0,GPSLatitudeRef,GPSLatitude,GPSLongitudeRef,GPSLongitude,GPSAltitudeRef,GPSAltitude,GPSSpeedRef,GPSSpeed,GPSImgDirectionRef,GPSImgDirection,GPSDestBearingRef,GPSDestBearing,GPSDateStamp,decimal_Lat,decimal_Long
image1.HEIC,N,"(48.0, 34.0, 27.46)",W,"(113.0, 30.0, 48.83)",b'\x00',2229.472989195678,K,0.5699999928474,T,294.1721804511278,T,294.1721804511278,2021:07:25,48.574294,-113.513564
image2.HEIC,N,"(48.0, 29.0, 0.82)",W,"(113.0, 22.0, 10.55)",b'\x00',1588.427113702624,K,0.3926195221919654,T,255.8991088478676,T,255.8991088478676,,48.483561,-113.369597
image3.HEIC,N,"(48.0, 36.0, 8.43)",W,"(113.0, 23.0, 1.67)",b'\x00',1581.5276752767527,K,1.6599999666164245,T,211.52862539959315,T,211.52862539959315,,48.602342,-113.383797
image4.JPG,N,"(48.0, 41.0, 47.11)",W,"(113.0, 43.0, 5.6)",GpsAltitudeRef.ABOVE_SEA_LEVEL,2023.487544,K,0.0,T,291.178986,T,291.178986,2020:07:04 11:34:56,48.696419,-113.718222
image5.JPG,N,"(48.0, 39.0, 35.09)",W,"(113.0, 47.0, 24.36)",GpsAltitudeRef.ABOVE_SEA_LEVEL,1194.870112,K,0.17,T,145.548599,T,145.548599,2020:07:03 11:51:28,48.659747,-113.7901
image8.JPG,N,"(48.0, 43.0, 36.3)",W,"(113.0, 43.0, 41.8)",GpsAltitudeRef.ABOVE_SEA_LEVEL,1733.286913,K,0.33,T,215.378494,T,215.378494,2020:07:04 10:21:33,48.72675,-113.728278
