In [1]:
# Use python 3.7+

import numpy as np
import pandas as pd
from pathlib import Path
import PIL
from PIL import Image, ExifTags
import torch
from torchvision.transforms import ToTensor
import shutil
from typing import Optional, List
from dataclasses import dataclass
import folium

@dataclass
class Lat_Lon:
    lat: float
    lon: float

In [2]:
# A callable class that creates the destination bin directories
# based on start, stop and step temperature arguments and can then
# copy a file from a source directory to a destination bin directory.

class Bin_Agent:
    
    def _create_bin_names(self):
        """Make the bin directory names."""
        bin_cnt = 2 + (self.stop - self.start) // self.step
        self.bin_names = [""] * bin_cnt
        self.bin_names[0] = "le_{}".format(self.start)
        self.bin_names[1:] = ["gt_{}".format(i) 
                             for i in range(self.start, self.stop + 1, self.step)]
        
    def _create_destination_bin_directories(self):
        """Create the directories under base_path if they do not exist."""
        for dir_name in self.bin_names:
            new_dir = self.base_path / dir_name
            new_dir.mkdir(exist_ok=True)    
            print("Directory {} created or already exists.".format(new_dir))
            
    def _get_destination_bin(self, max_temp : float) -> str:
        """Given a maximum temperature, find the destination bin"""
        bin_name = None

        if max_temp > self.stop:
            bin_name = self.bin_names[-1]
        elif max_temp <= self.start:
            bin_name = self.bin_names[0]
        else:
            bin_idx = int(max_temp) // self.step + 1
            bin_name = self.bin_names[bin_idx]

        return bin_name
           
    def __init__(self, base_path : Path, start : int, stop : int, step : int):
        assert step > 0, "Step must be greater than 0"
        assert start < stop, "Stop must be greater than Start"
        assert (stop - start) % step == 0, "Difference between start and stop must be an exact multiple of step"
        
        self.base_path = base_path
        self.start = start
        self.stop = stop
        self.step = step
        self.bin_names = []
        
        self._create_bin_names()
        self._create_destination_bin_directories()
    
    def __call__(self, tif_file : Path, max_temp : float) -> Path:
        """Given a tif file and its maximum temperature, find the destination path"""
        tif_file_name = tif_file.name
        bin_name = self._get_destination_bin(max_temp)
        return self.base_path / bin_name / tif_file_name

In [3]:
temper_path = Path("rjpeg_temperature_16bit_0-80")
geo_path = Path("rjpeg_modified_0-80_gps_mod")
base_path = Path(".")
temper_path, geo_path, base_path

(PosixPath('rjpeg_temperature_16bit_0-80'),
 PosixPath('rjpeg_modified_0-80_gps_mod'),
 PosixPath('.'))

In [4]:
# Create the bin agent with our input parameters.
bin_agent = Bin_Agent(Path("."),0,80,20)

Directory le_0 created or already exists.
Directory gt_0 created or already exists.
Directory gt_20 created or already exists.
Directory gt_40 created or already exists.
Directory gt_60 created or already exists.
Directory gt_80 created or already exists.


In [5]:
# Test bin_agent()
#bin_agent(temper_path/"DJI_0035_0.tif", 25.9999)

In [6]:
def get_tif_file_paths(src_path : Path) -> pd.DataFrame:
    """Get the *.tif filenames"""
    tif_file_paths = sorted(src_path.glob("*.tif"))
    print("Number of files = {}".format(len(tif_file_paths)))
    return pd.DataFrame({"src" : tif_file_paths})

In [7]:
df = get_tif_file_paths(temper_path); df

Number of files = 406


Unnamed: 0,src
0,rjpeg_temperature_16bit_0-80/DJI_0034.tif
1,rjpeg_temperature_16bit_0-80/DJI_0035.tif
2,rjpeg_temperature_16bit_0-80/DJI_0035_0.tif
3,rjpeg_temperature_16bit_0-80/DJI_0036.tif
4,rjpeg_temperature_16bit_0-80/DJI_0036_0.tif
5,rjpeg_temperature_16bit_0-80/DJI_0037.tif
6,rjpeg_temperature_16bit_0-80/DJI_0037_0.tif
7,rjpeg_temperature_16bit_0-80/DJI_0038.tif
8,rjpeg_temperature_16bit_0-80/DJI_0038_0.tif
9,rjpeg_temperature_16bit_0-80/DJI_0039.tif


In [8]:
def get_max_temperature_from_tif_file(tif_file : Path) -> float:
    """Read a temperature tif file and get the maximum temperature"""   

    max_temp = np.NAN
    
    try:
        # Open the tif image file using PIL and then convert it to a torch.tensor().
        with PIL.Image.open(tif_file) as tif_img:
            tensor_tif = ToTensor()(tif_img)

        # Get the max() value of the tensor (returns a single item tensor like tensor(3.))
        # The item() call gets the scaler item from the tensor (e.g. 3. from tensor(3.)).
            max_temp = tensor_tif.max().item()
    except:
        max_temp = np.NAN
        
    return max_temp

def get_jpg_path(tif_file : Path) -> Path:
    """Create the corresponding jpg filename from the tif filename"""
    
    jpg_file_suffix = ".jpg"
    
    # Stem gives the filename without the parent path or the suffix 
    # e.g. Path("/hello/world.tif") => "world"
    file_name_stem = tif_file.stem
    jpg_file = geo_path / (file_name_stem + jpg_file_suffix)
    return jpg_file

In [9]:
# Find the max temperature (in °C)
df["max_temp"] = df["src"].map(lambda fp: get_max_temperature_from_tif_file(fp)); df

Unnamed: 0,src,max_temp
0,rjpeg_temperature_16bit_0-80/DJI_0034.tif,20.862015
1,rjpeg_temperature_16bit_0-80/DJI_0035.tif,22.079849
2,rjpeg_temperature_16bit_0-80/DJI_0035_0.tif,18.603956
3,rjpeg_temperature_16bit_0-80/DJI_0036.tif,22.776861
4,rjpeg_temperature_16bit_0-80/DJI_0036_0.tif,17.483416
5,rjpeg_temperature_16bit_0-80/DJI_0037.tif,23.150208
6,rjpeg_temperature_16bit_0-80/DJI_0037_0.tif,18.687483
7,rjpeg_temperature_16bit_0-80/DJI_0038.tif,23.681208
8,rjpeg_temperature_16bit_0-80/DJI_0038_0.tif,18.352951
9,rjpeg_temperature_16bit_0-80/DJI_0039.tif,24.367432


In [10]:
# Check if any tif file is missing a max_temp.
df_fail = df[df["max_temp"].isna()] ; df_fail

Unnamed: 0,src,max_temp


In [11]:
# Find the appropriate destination directory, based on max temperature
df["dest_path"] = df.apply(lambda r: bin_agent(r["src"], r["max_temp"]), axis="columns"); df

Unnamed: 0,src,max_temp,dest_path
0,rjpeg_temperature_16bit_0-80/DJI_0034.tif,20.862015,gt_20/DJI_0034.tif
1,rjpeg_temperature_16bit_0-80/DJI_0035.tif,22.079849,gt_20/DJI_0035.tif
2,rjpeg_temperature_16bit_0-80/DJI_0035_0.tif,18.603956,gt_0/DJI_0035_0.tif
3,rjpeg_temperature_16bit_0-80/DJI_0036.tif,22.776861,gt_20/DJI_0036.tif
4,rjpeg_temperature_16bit_0-80/DJI_0036_0.tif,17.483416,gt_0/DJI_0036_0.tif
5,rjpeg_temperature_16bit_0-80/DJI_0037.tif,23.150208,gt_20/DJI_0037.tif
6,rjpeg_temperature_16bit_0-80/DJI_0037_0.tif,18.687483,gt_0/DJI_0037_0.tif
7,rjpeg_temperature_16bit_0-80/DJI_0038.tif,23.681208,gt_20/DJI_0038.tif
8,rjpeg_temperature_16bit_0-80/DJI_0038_0.tif,18.352951,gt_0/DJI_0038_0.tif
9,rjpeg_temperature_16bit_0-80/DJI_0039.tif,24.367432,gt_20/DJI_0039.tif


In [12]:
# Check the number of tif files that should be copied to each bin.
dfg = df["dest_path"].copy()
dfg["dest_bin"] = dfg.map(lambda p: p.parts[-2])
dfg.groupby(["dest_bin"]).count()

dest_bin
gt_0      89
gt_20    250
gt_40     34
gt_60     13
gt_80     20
Name: dest_path, dtype: int64

In [13]:
# Copy the tif files from source to destination.
df.apply(lambda r: shutil.copy2(src=r["src"], dst=r["dest_path"]), axis="columns")

0        gt_20/DJI_0034.tif
1        gt_20/DJI_0035.tif
2       gt_0/DJI_0035_0.tif
3        gt_20/DJI_0036.tif
4       gt_0/DJI_0036_0.tif
5        gt_20/DJI_0037.tif
6       gt_0/DJI_0037_0.tif
7        gt_20/DJI_0038.tif
8       gt_0/DJI_0038_0.tif
9        gt_20/DJI_0039.tif
10      gt_0/DJI_0039_0.tif
11       gt_20/DJI_0040.tif
12      gt_0/DJI_0040_0.tif
13       gt_20/DJI_0041.tif
14      gt_0/DJI_0041_0.tif
15       gt_20/DJI_0042.tif
16      gt_0/DJI_0042_0.tif
17       gt_20/DJI_0043.tif
18      gt_0/DJI_0043_0.tif
19       gt_20/DJI_0044.tif
20     gt_60/DJI_0044_0.tif
21       gt_20/DJI_0045.tif
22     gt_40/DJI_0045_0.tif
23       gt_20/DJI_0046.tif
24     gt_80/DJI_0046_0.tif
25       gt_20/DJI_0047.tif
26     gt_80/DJI_0047_0.tif
27       gt_20/DJI_0048.tif
28     gt_80/DJI_0048_0.tif
29       gt_20/DJI_0049.tif
               ...         
376      gt_20/DJI_0371.tif
377      gt_20/DJI_0372.tif
378      gt_20/DJI_0373.tif
379      gt_20/DJI_0374.tif
380      gt_20/DJI_0

In [14]:
# Filter rows which have a max_temp greater than 60°C. 
# Make a copy of filtered rows (otherwise the slice refers to the original dataframe)
df60 = df[df.max_temp > 60].copy(); df60

Unnamed: 0,src,max_temp,dest_path
20,rjpeg_temperature_16bit_0-80/DJI_0044_0.tif,76.497498,gt_60/DJI_0044_0.tif
24,rjpeg_temperature_16bit_0-80/DJI_0046_0.tif,84.199295,gt_80/DJI_0046_0.tif
26,rjpeg_temperature_16bit_0-80/DJI_0047_0.tif,87.315895,gt_80/DJI_0047_0.tif
28,rjpeg_temperature_16bit_0-80/DJI_0048_0.tif,89.829903,gt_80/DJI_0048_0.tif
30,rjpeg_temperature_16bit_0-80/DJI_0049_0.tif,85.095657,gt_80/DJI_0049_0.tif
32,rjpeg_temperature_16bit_0-80/DJI_0050_0.tif,78.216194,gt_60/DJI_0050_0.tif
34,rjpeg_temperature_16bit_0-80/DJI_0051_0.tif,74.424072,gt_60/DJI_0051_0.tif
51,rjpeg_temperature_16bit_0-80/DJI_0060.tif,62.09716,gt_60/DJI_0060.tif
53,rjpeg_temperature_16bit_0-80/DJI_0061.tif,98.318275,gt_80/DJI_0061.tif
55,rjpeg_temperature_16bit_0-80/DJI_0062.tif,104.347237,gt_80/DJI_0062.tif


In [15]:
# reset the index to 0
df60.index = pd.RangeIndex(len(df60.index)); df60

Unnamed: 0,src,max_temp,dest_path
0,rjpeg_temperature_16bit_0-80/DJI_0044_0.tif,76.497498,gt_60/DJI_0044_0.tif
1,rjpeg_temperature_16bit_0-80/DJI_0046_0.tif,84.199295,gt_80/DJI_0046_0.tif
2,rjpeg_temperature_16bit_0-80/DJI_0047_0.tif,87.315895,gt_80/DJI_0047_0.tif
3,rjpeg_temperature_16bit_0-80/DJI_0048_0.tif,89.829903,gt_80/DJI_0048_0.tif
4,rjpeg_temperature_16bit_0-80/DJI_0049_0.tif,85.095657,gt_80/DJI_0049_0.tif
5,rjpeg_temperature_16bit_0-80/DJI_0050_0.tif,78.216194,gt_60/DJI_0050_0.tif
6,rjpeg_temperature_16bit_0-80/DJI_0051_0.tif,74.424072,gt_60/DJI_0051_0.tif
7,rjpeg_temperature_16bit_0-80/DJI_0060.tif,62.09716,gt_60/DJI_0060.tif
8,rjpeg_temperature_16bit_0-80/DJI_0061.tif,98.318275,gt_80/DJI_0061.tif
9,rjpeg_temperature_16bit_0-80/DJI_0062.tif,104.347237,gt_80/DJI_0062.tif


In [16]:
# Compute the jpg filepath from the source tif filepath.
df60["jpg"] = df60["src"].map(lambda fp: get_jpg_path(fp)); df60

Unnamed: 0,src,max_temp,dest_path,jpg
0,rjpeg_temperature_16bit_0-80/DJI_0044_0.tif,76.497498,gt_60/DJI_0044_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0044_0.jpg
1,rjpeg_temperature_16bit_0-80/DJI_0046_0.tif,84.199295,gt_80/DJI_0046_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0046_0.jpg
2,rjpeg_temperature_16bit_0-80/DJI_0047_0.tif,87.315895,gt_80/DJI_0047_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0047_0.jpg
3,rjpeg_temperature_16bit_0-80/DJI_0048_0.tif,89.829903,gt_80/DJI_0048_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0048_0.jpg
4,rjpeg_temperature_16bit_0-80/DJI_0049_0.tif,85.095657,gt_80/DJI_0049_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0049_0.jpg
5,rjpeg_temperature_16bit_0-80/DJI_0050_0.tif,78.216194,gt_60/DJI_0050_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0050_0.jpg
6,rjpeg_temperature_16bit_0-80/DJI_0051_0.tif,74.424072,gt_60/DJI_0051_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0051_0.jpg
7,rjpeg_temperature_16bit_0-80/DJI_0060.tif,62.09716,gt_60/DJI_0060.tif,rjpeg_modified_0-80_gps_mod/DJI_0060.jpg
8,rjpeg_temperature_16bit_0-80/DJI_0061.tif,98.318275,gt_80/DJI_0061.tif,rjpeg_modified_0-80_gps_mod/DJI_0061.jpg
9,rjpeg_temperature_16bit_0-80/DJI_0062.tif,104.347237,gt_80/DJI_0062.tif,rjpeg_modified_0-80_gps_mod/DJI_0062.jpg


In [17]:
def get_latitude_longitude(file_path : Path) -> Optional[Lat_Lon]:
    """Given a jpg file, return the latitude and longitude from the exif data"""
    
    def get_decimal_degrees_from_dms(dms, ref : str) -> float:
        """ Convert the rational64u format of degree, minute, second to decimal_degrees.
            Sample input dms format: ((51, 1), (45, 1), (1358029999, 50000000))
        """
        (dn, dd), (mn, md), (sn, sd) = dms
        d, m, s = float(dn)/dd, float(mn)/md, float(sn)/sd
        decimal_dms = d + m/60.0 + s/(3600.0)
        # If Latitude is 'S' or Longitude is 'W' , the degree sign should be negative
        return -decimal_dms if ref in ["S", "W"] else decimal_dms
        
    # Get the GPSInfo exif data
    try:
        image = PIL.Image.open(file_path)
        gps_exif = {PIL.ExifTags.TAGS[k]: v for k, v in image._getexif().items()
                        if k in PIL.ExifTags.TAGS and PIL.ExifTags.TAGS[k] == "GPSInfo"}

        if len(gps_exif) == 0:
            return None
    except:
        return None
    
    # Add the GPS tags.
    gps_exif_with_tags = {PIL.ExifTags.GPSTAGS[k]: v for k, v in gps_exif["GPSInfo"].items()
                            if k in PIL.ExifTags.GPSTAGS}
    
    # Verify presence of the required GPS tags.
    if not frozenset(["GPSLatitudeRef", "GPSLatitude", "GPSLongitudeRef", 
                     "GPSLongitude"]).issubset(gps_exif_with_tags.keys()):
        return None
    
    result = None
    # Convert the rational64u format of degree, minute, second to decimal_degrees 
    try:
        assert gps_exif_with_tags["GPSLatitudeRef"] in ["N", "S"], "LatitudeRef must be 'N' or 'S'"
        lat = get_decimal_degrees_from_dms(gps_exif_with_tags["GPSLatitude"], 
                                           gps_exif_with_tags["GPSLatitudeRef"])
        
        assert gps_exif_with_tags["GPSLongitudeRef"] in ["E", "W"], "LongitudeRef must be 'E' or 'W'"                                                          
        lon = get_decimal_degrees_from_dms(gps_exif_with_tags["GPSLongitude"], 
                                           gps_exif_with_tags["GPSLongitudeRef"])
    except:
        return None                                                        

    return Lat_Lon(round(lat, 6),  round(lon, 6))

In [18]:
# Get the latitude and longitude from the exif data of the jpg files
df60["coord"] = df60["jpg"].map(lambda fp: get_latitude_longitude(fp)); df60

Unnamed: 0,src,max_temp,dest_path,jpg,coord
0,rjpeg_temperature_16bit_0-80/DJI_0044_0.tif,76.497498,gt_60/DJI_0044_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0044_0.jpg,"Lat_Lon(lat=51.755092, lon=-100.896346)"
1,rjpeg_temperature_16bit_0-80/DJI_0046_0.tif,84.199295,gt_80/DJI_0046_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0046_0.jpg,"Lat_Lon(lat=51.755092, lon=-100.896667)"
2,rjpeg_temperature_16bit_0-80/DJI_0047_0.tif,87.315895,gt_80/DJI_0047_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0047_0.jpg,"Lat_Lon(lat=51.755092, lon=-100.896829)"
3,rjpeg_temperature_16bit_0-80/DJI_0048_0.tif,89.829903,gt_80/DJI_0048_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0048_0.jpg,"Lat_Lon(lat=51.755092, lon=-100.89699)"
4,rjpeg_temperature_16bit_0-80/DJI_0049_0.tif,85.095657,gt_80/DJI_0049_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0049_0.jpg,"Lat_Lon(lat=51.755093, lon=-100.897152)"
5,rjpeg_temperature_16bit_0-80/DJI_0050_0.tif,78.216194,gt_60/DJI_0050_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0050_0.jpg,"Lat_Lon(lat=51.755093, lon=-100.897314)"
6,rjpeg_temperature_16bit_0-80/DJI_0051_0.tif,74.424072,gt_60/DJI_0051_0.tif,rjpeg_modified_0-80_gps_mod/DJI_0051_0.jpg,"Lat_Lon(lat=51.755093, lon=-100.897475)"
7,rjpeg_temperature_16bit_0-80/DJI_0060.tif,62.09716,gt_60/DJI_0060.tif,rjpeg_modified_0-80_gps_mod/DJI_0060.jpg,"Lat_Lon(lat=51.757544, lon=-100.896372)"
8,rjpeg_temperature_16bit_0-80/DJI_0061.tif,98.318275,gt_80/DJI_0061.tif,rjpeg_modified_0-80_gps_mod/DJI_0061.jpg,"Lat_Lon(lat=51.757544, lon=-100.896185)"
9,rjpeg_temperature_16bit_0-80/DJI_0062.tif,104.347237,gt_80/DJI_0062.tif,rjpeg_modified_0-80_gps_mod/DJI_0062.jpg,"Lat_Lon(lat=51.757544, lon=-100.896092)"


In [19]:
# Check if any coordinate is missing.
df_fail = df60[df60["coord"].isna()]; df_fail

Unnamed: 0,src,max_temp,dest_path,jpg,coord


In [20]:
# Create the map
tmap = folium.Map(location=[51.755, -100.896], zoom_start=12)
for ir in df60.iterrows():
    row = ir[1]
    temp = round(row["max_temp"], 2)
    lat = row["coord"].lat
    lon = row["coord"].lon
    folium.Marker(location=[lat, lon], 
        popup="Lat:{},Long:{},Temp={}°C".format(lat, lon, temp),
        icon=folium.Icon(color="red", icon="info-sign")).add_to(tmap)
    
tmap.save("hotspots60C_map.html")

display(tmap)