In [1]:
import json
import os
import time
from datetime import date

import numpy as np
import pandas as pd

"""Create CSV Data for ArcGIS to Interface With"""

'Create CSV Data for ArcGIS to Interface With'

In [2]:
def get_min_max_years(base_dir, sites_data_paths):
    min_year = 2009
    max_year = 2018
    for site_data_paths in sites_data_paths:
        meta_filepath = [filepath for filepath in site_data_paths if 'meta' in filepath][0]
        with open(file=os.path.join(base_dir, meta_filepath)) as f:
            meta_data = json.load(f)
        year_start = int(meta_data['phenocam_site']['date_start'][:4])
        year_end = int(meta_data['phenocam_site']['date_end'][:4])

        min_year = min([min_year, year_start])
        max_year = max([max_year, year_end])
    return min_year, max_year

In [3]:
def calc_days_between_date_strs(date_str1, date_str2):
    date_list1 = date_str1.split('-')
    date_list1 = [int(n) for n in date_list1]
    date_list2 = date_str2.split('-')
    date_list2 = [int(n) for n in date_list2]

    date1 = date(year=date_list1[0], month=date_list1[1], day=date_list1[2])
    date2 = date(year=date_list2[0], month=date_list2[1], day=date_list2[2])

    diff = date2 - date1
    return int(diff.total_seconds() / (3600 * 24))

In [4]:
def calc_avg_last3yrs_stats(phases_yrly):
    # get index of last phase
    idx = 0
    for i, phase in reversed(list(enumerate(phases_yrly))):
        if phase['date'] != None:
            idx = i
            break

    def datestring_to_timestamp(str):
        try:
            str = '2000' + str[4:]
            return time.mktime(time.strptime(str, "%Y_%m_%d"))
        except:
            return None

    def timestamp_to_datestring(timestamp):
        return time.strftime("%m/%d", time.localtime(timestamp))

    # try to get two phases beforehand
    lastyr_tdate = 0
    midyr_tdate = 0
    frstyr_tdate = 0
    lastyr_dur = 0
    midyr_dur = 0
    frstyr_dur = 0
    total_phases = 3
    try:
        lastyr_tdate = datestring_to_timestamp(phases_yrly[idx]['date'])
        lastyr_dur = phases_yrly[idx]['duration']
        try:
            midyr_tdate = datestring_to_timestamp(phases_yrly[idx]['date'])
            midyr_dur = phases_yrly[idx]['duration']
            try:
                frstyr_tdate = datestring_to_timestamp(phases_yrly[idx]['date'])
                frstyr_dur = phases_yrly[idx]['duration']
            except:
                total_phases -= 1
        except:
            total_phases -= 1
    except:
        raise IndexError()

    avg_last3yrs_tdate = None
    try:
        avg_last3yrs_tdate = timestamp_to_datestring((lastyr_tdate+midyr_tdate+frstyr_tdate)/total_phases)
    except:
        pass
    avg_last3yrs_dur = None
    try:
        avg_last3yrs_dur = (lastyr_dur+midyr_dur+frstyr_dur)
    except:
        pass
    return avg_last3yrs_tdate, avg_last3yrs_dur

In [5]:
def calc_avg_diff_yrly_stats(phases_yrly):
    total_diff_tdate = 0
    total_diff_dur = 0
    total_diff_dur_prcnt = 0
    count = 0
    for phase in phases_yrly:
        try:
            count += 1
            total_diff_tdate += phase['diff_tdate']
            total_diff_dur += phase['diff_duration']
            total_diff_dur_prcnt += phase['diff_duration_percent']
        except:
            pass
    return int(total_diff_tdate / count), int(total_diff_dur / count), int(total_diff_dur_prcnt / count)

In [6]:
def calc_days_between_firstlast_tdate(phases_yrly):
    month1 = None
    day1 = None
    for phase in phases_yrly:
        if phase['date'] != None:
            month1 = phase['month']
            day1 = phase['day']
            break

    month2 = None
    day2 = None
    for phase['date'] in reversed(phases_yrly):
        if phase != None:
            month2 = phase['month']
            day2 = phase['day']
            break
    try:
        date1 = date(year=2020, month=month1, day=day1)
        date2 = date(year=2020, month=month2, day=day2)

        diff = date2 - date1
        return int(diff.total_seconds() / (3600 * 24))
    except:
        return None

In [7]:
def calc_stats_by_year(siteJSON, years):
    transitions = siteJSON['transitions']

    # sort transitions by date
    transitions.sort(key=lambda x: x['year']*365 + x['doy'])

    # calculate durations
    for i in range(len(transitions)-1):
        this_date = date(year=transitions[i]['year'], month=transitions[i]['month'], day=transitions[i]['day'])
        next_date = date(year=transitions[i+1]['year'], month=transitions[i+1]['month'], day=transitions[i+1]['day'])
        diff = next_date - this_date
        duration = int(diff.total_seconds() / (3600 * 24))
        transitions[i]['duration'] = duration
    transitions[-1]['duration'] = None

    # reading-friendly date
    for transition in transitions:
        tdate = str(transition['month']) + '/' + str(transition['day']) + '/' + str(transition['year'])
        transition['tdate'] = tdate

    # sort into respective yearly phenophases
    budburst_phases_yrly = []
    senescence_phases_yrly = []
    for phase in [transition for transition in transitions if transition['rising']]:
        bb_phase.append(phase)
    for phase in [transition for transition in transitions if not transition['rising']]:
        senescence_phases_yrly.append(phase)

    # calculate changes from previous year
    for i in range(len(years)):
        # calculate changes for bud burst phases
        try:
            current_date = date(year=budburst_phases_yrly[i]['year'], month=budburst_phases_yrly[i]['month'], day=budburst_phases_yrly[i]['day'])
            prev_date = date(year=budburst_phases_yrly[i-1]['year'], month=budburst_phases_yrly[i-1]['month'], day=budburst_phases_yrly[i-1]['day'])
            diff = current_date - prev_date
            diff_onset_date = int(diff.total_seconds() / (3600 * 24))

            current_duration = budburst_phases_yrly[i]['duration']
            prev_duration = budburst_phases_yrly[i-1]['duration']
            diff_duration = current_duration - prev_duration
            diff_duration_prcnt = diff_duration / prev_duration

            budburst_phases_yrly[i]['diff_tdate'] = diff_onset_date
            budburst_phases_yrly[i]['diff_duration'] = diff_duration
            budburst_phases_yrly[i]['diff_duration_prcnt'] = diff_duration_prcnt

        except:
            budburst_phases_yrly[i]['diff_tdate'] = None
            budburst_phases_yrly[i]['diff_duration'] = None
            budburst_phases_yrly[i]['diff_duration_prcnt'] = None

        # calculate changes for leaf senescence phases
        try:
            current_date = date(year=senescence_phases_yrly[i]['year'], month=senescence_phases_yrly[i]['month'], day=senescence_phases_yrly[i]['day'])
            prev_date = date(year=senescence_phases_yrly[i-1]['year'], month=senescence_phases_yrly[i-1]['month'], day=senescence_phases_yrly[i-1]['day'])
            diff = current_date - prev_date
            diff_onset_date = int(diff.total_seconds() / (3600 * 24))

            current_duration = senescence_phases_yrly[i]['duration']
            prev_duration = senescence_phases_yrly[i-1]['duration']
            diff_duration = current_duration - prev_duration
            diff_duration_prcnt = diff_duration / prev_duration

            senescence_phases_yrly[i]['diff_tdate'] = diff_onset_date
            senescence_phases_yrly[i]['diff_duration'] = diff_duration
            senescence_phases_yrly[i]['diff_duration_prcnt'] = diff_duration_prcnt

        except:
            senescence_phases_yrly[i]['diff_tdate'] = None
            senescence_phases_yrly[i]['diff_duration'] = None
            senescence_phases_yrly[i]['diff_duration_prcnt'] = None

    # input statistics into stats dict
    stats = {}

    # input yearly stats
    for i, yr in enumerate(years):
        # input yearly bud burst stats
        stats['{}_yrly_budburst_tdate'.format(yr)] = budburst_phases_yrly[i]['tdate']
        stats['{}_yrly_budburst_dur'.format(yr)] = budburst_phases_yrly[i]['duration']
        stats['{}_diff_yrly_budburst_tdate'.format(yr)] = budburst_phases_yrly[i]['diff_tdate']
        stats['{}_diff_yrly_budburst_dur'.format(yr)] = budburst_phases_yrly[i]['diff_duration']
        stats['{}_diff_yrly_budburst_dur_prcnt'.format(yr)] = budburst_phases_yrly[i]['diff_duration_prcnt']

        # input yearly leaf senescence stats
        stats['{}_yrly_senescence_tdate'.format(yr)] = senescence_phases_yrly[i]['tdate']
        stats['{}_yrly_senescence_dur'.format(yr)] = senescence_phases_yrly[i]['duration']
        stats['{}_diff_yrly_senescence_tdate'.format(yr)] = senescence_phases_yrly[i]['diff_tdate']
        stats['{}_diff_yrly_senescence_dur'.format(yr)] = senescence_phases_yrly[i]['diff_duration']
        stats['{}_diff_yrly_senescence_dur_prcnt'.format(yr)] = senescence_phases_yrly[i]['diff_duration_prcnt']

    # input general stats
    # calculate bud burst general stats
    avg_last3yrs_budburst_tdate, avg_last3yrs_budburst_dur = calc_avg_last3yrs_stats(budburst_phases_yrly)
    avg_diff_yrly_budburst_tdate, avg_diff_yrly_budburst_dur, avg_diff_yrly_budburst_dur_prcnt = calc_avg_diff_yrly_stats(budburst_phases_yrly)
    diff_firstlast_budburst_tdate = calc_days_between_firstlast_tdate(budburst_phases_yrly)
    try:
        diff_firstlast_budburst_dur = budburst_phases_yrly[0]['duration'] - budburst_phases_yrly[-1]['duration']
    except:
        diff_firstlast_budburst_dur = None
    try:
        diff_firstlast_budburst_dur_prcnt = stats['diff_firstlast_budburst_dur'] / budburst_phases_yrly[0]['duration']
    except:
        diff_firstlast_budburst_dur_prcnt = None

    avg_last3yrs_senescence_tdate, avg_last3yrs_senescence_dur = calc_avg_last3yrs_stats(senescence_phases_yrly)
    avg_diff_yrly_senescence_tdate, avg_diff_yrly_senescence_dur, avg_diff_yrly_senescence_dur_prcnt = calc_avg_diff_yrly_stats(senescence_phases_yrly)
    diff_firstlast_sensescence_tdate = calc_days_between_firstlast_tdate(senescence_phases_yrly)
    try:
        diff_firstlast_senescence_dur = budburst_phases_yrly[0]['duration'] - budburst_phases_yrly[-1]['duration']
    except:
        diff_firstlast_senescence_dur = None
    try:
        diff_firstlast_senescence_dur_prcnt = stats['diff_firstlast_budburst_dur'] / budburst_phases_yrly[0]['duration']
    except:
        diff_firstlast_senescence_dur_prcnt = None

    # input bud burst general stats
    stats['last_budburst_tdate'] = budburst_phases_yrly[-1]['tdate']
    stats['last_budburst_dur'] = budburst_phases_yrly[-1]['duration']
    stats['avg_last3yrs_budburst_tdate'] = avg_last3yrs_budburst_tdate  # calcualted
    stats['avg_last3yrs_budburst_dur'] = avg_last3yrs_budburst_dur  # calculated
    stats['avg_diff_yrly_budburst_tdate'] = avg_diff_yrly_budburst_tdate  # calculated
    stats['avg_diff_yrly_budburst_dur'] = avg_diff_yrly_budburst_dur  # calculated
    stats['avg_diff_yrly_budburst_dur_prcnt'] = avg_diff_yrly_budburst_dur_prcnt  # calculated
    stats['diff_firstlast_budburst_tdate'] = diff_firstlast_budburst_tdate  # calculated
    stats['diff_firstlast_budburst_dur'] = diff_firstlast_budburst_dur  # calculated
    stats['diff_firstlast_budburst_dur_prcnt'] = diff_firstlast_budburst_dur_prcnt  # calculated

    # calculate leaf senescence general stats
    # input leaf senescence general stats
    stats['last_senescence_tdate'] = budburst_phases_yrly[-1]['tdate']
    stats['last_senescence_dur'] = budburst_phases_yrly[-1]['duration']
    stats['avg_last3yrs_senescence_tdate'] = avg_last3yrs_senescence_tdate  # calcualted
    stats['avg_last3yrs_senescence_dur'] = avg_last3yrs_senescence_dur  # calcualted
    stats['avg_diff_yrly_senescence_tdate'] = avg_diff_yrly_senescence_tdate  # calcualted
    stats['avg_diff_yrly_senescence_dur'] = avg_diff_yrly_senescence_dur  # calcualted
    stats['avg_diff_yrly_senescence_dur_prcnt'] = avg_diff_yrly_senescence_dur_prcnt  # calcualted
    stats['diff_firstlast_senescence_tdate'] = diff_firstlast_sensescence_tdate  # calcualted
    stats['diff_firstlast_senescence_dur'] = diff_firstlast_senescence_dur  # calculated
    stats['diff_firstlast_senescence_dur_prcnt'] = diff_firstlast_senescence_dur_prcnt  # calculated

    return stats

In [8]:
def get_site_data(site_data_paths, min_year, max_year, base_dir):
    meta_filepath = [filepath for filepath in site_data_paths if 'meta' in filepath][0]
    transition_dates_filepath = [filepath for filepath in site_data_paths if 'transition_dates' in filepath][0]
    with open(file=os.path.join(base_dir, meta_filepath)) as f:
        meta_data = json.load(f)
    with open(file=os.path.join(base_dir, transition_dates_filepath)) as f:
        transition_data = json.load(f)

    """meta data"""
    meta = {'sitename': meta_data['phenocam_site']['sitename'], 'location': meta_data['phenocam_site']['long_name'], 'latitude': meta_data['phenocam_site']['lat'],
            'longitude': meta_data['phenocam_site']['lon'], 'elevation': meta_data['phenocam_site']['elevation'], 'first_im_date': meta_data['phenocam_site']['date_start'],
            'last_im_date': meta_data['phenocam_site']['date_end'], 'last_updated': meta_data['last_updated'], 'dominant_species': meta_data['phenocam_site']['dominant_species'].replace('\n',' '),
            'num_images': calc_days_between_date_strs(meta_data['phenocam_site']['date_start'], meta_data['phenocam_site']['date_end'])}

    """"stats data"""
    years = [n for n in range(min_year, max_year+1)]

    stats = calc_stats_by_year(transition_data, years)

    return {**meta, **stats}

In [9]:
def create_csv():
    # get all meta and transition date json filepaths
    source_dir = './../../../Phenophase_Classification/phenocam_data/'
    sites_data = [site_data for site_data in os.listdir(source_dir) if site_data.endswith('_meta.json') or site_data.endswith('_transition_dates.json')]
    sitenames = set([site_data[:site_data.find('_')] for site_data in sites_data])
    sites_data_paths = []
    for sitename in sitenames:
        sites_data_paths.append([file for file in sites_data if sitename in file])
    min_year, max_year = get_min_max_years(source_dir, sites_data_paths)

    # gather data for each site
    data = [get_site_data(site, min_year, max_year, source_dir) for site in sites_data_paths]

    # sort data alphabetically by sitename
    data.sort(key=lambda x: x['sitename'])

    # create dataframe
    df = pd.DataFrame(data)

    # convert empty string and None cells to NaN
    df.replace('', np.nan)
    df.fillna(np.nan)

    # drop columns that only have NaN
    df.dropna(1, 'all')

    # save dataframe
    module_dir = './'
    file_path = os.path.join(module_dir, 'static/mapdata.csv')
    df.to_csv(file_path)

In [10]:
create_csv()