In [1]:
# Phenocam Data Retrieval
# This script can take many hours to complete, but it is designed to be able to stop suddenly and pick back up where it left off.

In [1]:
import os
from tqdm import tqdm
import numpy as np
import pandas as pd
from pathlib import Path
import json
from time import sleep
import pickle as pkl

from shapely.geometry import Polygon, Point
from pyproj import CRS, Transformer
from pyproj.aoi import AreaOfInterest
from pyproj.database import query_utm_crs_info
from pathlib import Path
import requests

In [2]:
DATA_DIR = Path('data')
MERGED_DIR = DATA_DIR / 'merged'
SITES = os.listdir(MERGED_DIR)
CONFIG_FILE = Path('config.json')

with open(CONFIG_FILE, 'r') as f:
    config = json.load(f)

BASE_URL = 'https://phenocam.nau.edu'
MAX_DIST = config['phenocam']['max_distance_from_tower_m']
camera_url = lambda camera: f'{BASE_URL}/api/middayimages/{camera}'

In [3]:
def get_polygon_wkt(lat, lon, offset_m) -> Polygon:
    """Generate a WKT polygon based on the provided lat, lon, and offset in meters.
    
    Args:
        lat (float): Latitude of the site in WGS84.
        lon (float): Longitude of the site in WGS84.
        offset_m (int): Number of meters to offset in each direction from the site.
        
    Returns:
        Polygon: A Shapely polygon representing the box around the site.
    """
    utm_crs_info = query_utm_crs_info(
        area_of_interest=AreaOfInterest(west_lon_degree=lon, south_lat_degree=lat, east_lon_degree=lon, north_lat_degree=lat),
        datum_name="WGS 84"
    )[0]
    utm_crs = CRS.from_epsg(utm_crs_info.code)
    transformer_utm_to_wgs84 = Transformer.from_crs(utm_crs, "EPSG:4326", always_xy=True)
    transformer_wgs84_to_utm = Transformer.from_crs("EPSG:4326", utm_crs, always_xy=True)

    # Calculate box bounds
    x, y = transformer_wgs84_to_utm.transform(lon, lat)
    bottom_left = (x - offset_m, y - offset_m)
    bottom_right = (x + offset_m, y - offset_m)
    top_right = (x + offset_m, y + offset_m)
    top_left = (x - offset_m, y + offset_m)

    # Create the polygon in WGS84
    box = Polygon([bottom_left, bottom_right, top_right, top_left, bottom_left])
    lon_lat_coords = [transformer_utm_to_wgs84.transform(xx, yy) for xx, yy in zip(*box.exterior.xy)]
    geo_polygon = Polygon(lon_lat_coords)

    return geo_polygon

In [4]:
def format_timestamp(ts):
  # ts - int representation YYYYMMDDHHMM, ex: 202108120630
  sts = str(ts)
  return f'{sts[0:4]}-{sts[4:6]}-{sts[6:8]}'


def unformat_timestamp(ts):
  # ts - string representation 'YYYY_MM_DD'
  # assume output is solar noon
  return int(f"{''.join(ts.split('-'))}1200")


def get_all_timestamps(min_date, max_date):
    min_date_formatted = pd.to_datetime(min_date, format='%Y%m%d%H%M')
    max_date_formatted = pd.to_datetime(max_date, format='%Y%m%d%H%M')
    timestamp_range = pd.date_range(start=min_date_formatted, end=max_date_formatted, freq='30T')
    timestamp_range_int = timestamp_range.strftime('%Y%m%d%H%M').astype(int)
    return list(timestamp_range_int)

In [5]:
def get_image_list(camera):
    try:
        response = requests.get(camera_url(camera))
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error: {response.status_code}, {response.text}")
            return None

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

def download_images(image_paths, downloaded_images):
    for i in image_paths:
        img_file = DATA_DIR/'phenocam'/i.split('/')[-1]
        url = BASE_URL+i
        try:
            response = requests.get(url)
            if response.status_code == 200:
                with open(img_file, "wb") as file:
                    file.write(response.content)
                downloaded_images[img_file] = True
            # else:
            #     print(f"\tError: {response.status_code}")

        except requests.exceptions.RequestException as e:
            print(f"\tAn error occurred: {e}")
    
        delay = np.random.uniform(low=0.05, high=0.1) # average 75ms wait time
        sleep(delay)
    

In [13]:
phenocam_sites = pd.read_csv('phenocam_sites.csv')
downloaded_images = {i: True for i in os.listdir(DATA_DIR/'phenocam')}

# After every camera analysis, this function caches progress
for site in tqdm(SITES):
    # print(site)
    with open(MERGED_DIR/site/'meta.json', 'r') as f:
        site_meta = json.loads(f.read())
    min_date = site_meta['MIN_DATE']
    max_date = site_meta['MAX_DATE']
    
    cameras = []
    allowable_poly = get_polygon_wkt(site_meta['LOCATION_LAT'], site_meta['LOCATION_LON'], MAX_DIST)
    for i, row in phenocam_sites.iterrows():
        p = Point(row['LOCATION_LON'], row['LOCATION_LAT'])
        if allowable_poly.contains(p):
            cameras.append(row['Camera'])
    
    if os.path.exists(MERGED_DIR/site/'phenocam.pkl'):
        with open(MERGED_DIR/site/'phenocam.pkl', 'rb') as f:
            phenocam_lookup = pkl.load(f)
    else:
        phenocam_lookup = {d: [] for d in get_all_timestamps(min_date, max_date)}

    # if len(cameras) > 1:
    #     print(f'\tmore than one camera for {site}')
    # Currently we are not using IR images.
    for camera in cameras:
        image_list = get_image_list(camera)
        filtered_download_list = []
        for i in image_list:
            ts = unformat_timestamp(i['imgdate'])
            if ts < min_date or max_date < ts:
                continue
            
            img_file = i['imgpath'].split('/')[-1]
            if img_file not in phenocam_lookup[ts]:
                phenocam_lookup[ts].append(img_file)
            
            if not downloaded_images.get(img_file, False):
                filtered_download_list.append(i['imgpath'])
        # if len(filtered_download_list) > 0:
        #     print(f'\tDownlaoding {len(filtered_download_list)} files')
        download_images(filtered_download_list, downloaded_images)
        with open(MERGED_DIR/site/'phenocam.pkl', 'wb') as f:
             pkl.dump(phenocam_lookup, f)

100%|██████████| 417/417 [38:12:25<00:00, 329.84s/it]     
