# Compare Multi-tempCNN Out-of-site and Modis-tempCNN Maps

In [None]:
import os
import gdal
import numpy as np
import pandas as pd
import json

import matplotlib.pyplot as plt

import initialise
import common
from display_utils import display_frames

## Locations of TIFFs

In [None]:
tif_dir = os.path.join(common.MAPS_DIR, 'LFMC_maps')
multi_tif = os.path.join(tif_dir, 'Multi-tempCNN_Out-of-Site_base_2017-10-01.tif')
modis_tif = os.path.join(tif_dir, 'Modis-tempCNN_2017_merge10_2017-10-01.tif')
diff_tif = os.path.join(tif_dir, 'Multi-tempCNN vs Modis-tempCNN.tif')
aux_tif = os.path.join(common.GEE_MAPS_DIR, 'GEE_EPSG-4326_2000', 'conus_aux.tif')

In [None]:
def num_pixels(start_loc, end_loc, pixel_size, convert=np.round):
    return_type = type(start_loc)
    pixels = (np.array(end_loc) - np.array(start_loc)) / np.array(pixel_size)
    if convert:
        pixels = convert(pixels)
    return return_type(pixels)

## Load the auxiliary data

In [None]:
aux_file = gdal.Open(aux_tif, gdal.GA_ReadOnly)
aux_data = aux_file.ReadAsArray()
aux_data.shape
aux_bands = [aux_file.GetRasterBand(b).GetDescription() for b in range(1, aux_file.RasterCount+1)]

## Load the Multi-tempCNN LFMC estimates

In [None]:
tif = gdal.Open(multi_tif, gdal.GA_ReadOnly)
image = tif.ReadAsArray()
shape = image.shape
shape

## Merge the auxiliary data and Multi-tempCNN estimates
The auxiliary data first needs to be aligned with the Multi-tempCNN estimates data

In [None]:
aux_transform = aux_file.GetGeoTransform()
tif_transform = tif.GetGeoTransform()
aux_offset = num_pixels((aux_transform[3], aux_transform[0]),
                        (tif_transform[3], tif_transform[0]),
                        (aux_transform[5], aux_transform[1]))
aux_offset = np.int32(aux_offset)
aux_offset

aux_data = aux_data[:, aux_offset[0]:(shape[1]+aux_offset[0]), aux_offset[1]:(shape[2]+aux_offset[1])].reshape(len(aux_bands), shape[1], shape[2])
all_data = np.concatenate((image, aux_data), axis=0)

## Convert the image to a dataframe
Extract the required fields from the numpy array and flatten to a data frame. Remove the NODATA pixels.

In [None]:
data_df = []
for col in range(all_data.shape[0]):
    temp_df = pd.DataFrame(all_data[col]).stack()
    data_df.append(temp_df)
data_df = pd.concat(data_df, axis=1, keys=['lfmc', 'conf'] + aux_bands)
data_df = data_df[data_df.lfmc.gt(-990)]
with pd.option_context('display.float_format', '{:.2f}'.format):
    display(data_df.describe())

# LFMC Estimation Analysis

In [None]:
df1 = data_df[(data_df.longitude < -105)
                    & ~(data_df.longitude.lt(-120) & data_df.latitude.gt(42))
                    & ~(data_df.longitude.between(-117, -110) & data_df.latitude.gt(44))
                   ][['lfmc', 'conf']].describe()
df2 = data_df[(data_df.longitude.lt(-120) & data_df.latitude.gt(42))
                    | (data_df.longitude.between(-117, -110) & data_df.latitude.gt(44))
                   ][['lfmc', 'conf']].describe()
df3 = data_df[data_df.longitude > -105][['lfmc', 'conf']].describe()
df4 = data_df[(data_df.longitude > -105)
                    & ~(data_df.longitude.between(-100, -80) & data_df.climate_zone.eq(25))
                   ][['lfmc', 'conf']].describe()
df5 = data_df[data_df.longitude.between(-100, -80) & data_df.climate_zone.eq(25)][['lfmc', 'conf']].describe()
df6 = data_df[(data_df.climate_zone == 14) & (data_df.longitude > -100)][['lfmc', 'conf']].describe()
with pd.option_context('display.float_format', '{:.2f}'.format):
    display(
        pd.concat([df1, df2, df3, df4, df5, df6],
                  keys = ['Western CONUS excluding Pacific NW and Nth Rockies',
                          'Pacific Northwest and the northern Rockies',
                          'Mid and eastern CONUS',
                          'Mid and eastern CONUS, excluding mid-western Corn Belt',
                          'Mid-western Corn Belt',
                          'South eastern Cfa climate zone'], axis=1))

# Estimation Uncertainty Analysis

## Uncertainty summary statistics

In [None]:
with pd.option_context('display.float_format', '{:.2f}'.format):
    display(data_df['conf'].describe())

## Percentage of pixels with uncertainty < 10

In [None]:
((data_df.conf < 10).sum() * 100 / data_df.conf.count()).round(2)

# Comparison with Modis-tempCNN

In [None]:
tif = gdal.Open(diff_tif, gdal.GA_ReadOnly)
image = tif.ReadAsArray()
all_data = np.concatenate(([image], aux_data), axis=0)
diff_df = []
for col in range(all_data.shape[0]):
    temp_df = pd.DataFrame(all_data[col]).stack()
    diff_df.append(temp_df)
diff_df = pd.concat(diff_df, axis=1, keys=['diff'] + aux_bands)
diff_df = diff_df[diff_df['diff'].gt(-990)]

## Summary statistics

In [None]:
with pd.option_context('display.float_format', '{:.2f}'.format):
    display(diff_df['diff'].describe())

## Percentage of pixels where Multi-tempCNN estimate is higher than Modis-tempCNN

In [None]:
print('Percentage with Multi-tempCNN estimate > Modis-tempCNN:',
      ((diff_df['diff'] >= 0).sum() * 100 / diff_df['diff'].count()).round(2))

print('Percentage with rounded Multi-tempCNN estimate > Modis-tempCNN:',
      ((diff_df['diff'].round() >= 0).sum() * 100 / diff_df['diff'].count()).round(2))

print('Percentage with Multi-tempCNN estimate close to or greater than Modis-tempCNN:',
      ((diff_df['diff'] >= -1.3).sum() * 100 / diff_df['diff'].count()).round(2))

print('Percentage with absolute difference between Multi-tempCNN estimate and Modis-tempCNN < 4:',
      ((diff_df['diff'].abs() < 4).sum() * 100 / diff_df['diff'].count()).round(2))

print('Percentage with Multi-tempCNN estimate greater than 4 more than Modis-tempCNN:',
      ((diff_df['diff'] > 4).sum() * 100 / diff_df['diff'].count()).round(2))

## Estimation differences in Regions of Interest

In [None]:
tif = gdal.Open(modis_tif, gdal.GA_ReadOnly)
image = tif.ReadAsArray()
all_data = np.concatenate((image, aux_data), axis=0)
modis_df = []
for col in range(all_data.shape[0]):
    temp_df = pd.DataFrame(all_data[col]).stack()
    modis_df.append(temp_df)
modis_df = pd.concat(modis_df, axis=1, keys=['lfmc', 'conf'] + aux_bands)
modis_df = modis_df[modis_df['lfmc'].gt(-990)]
with pd.option_context('display.float_format', '{:.2f}'.format):
    display(modis_df['lfmc'].describe())

In [None]:
temp_df3 = pd.concat([
    diff_df[diff_df.longitude.lt(-120) & diff_df.latitude.gt(42) & diff_df.climate_zone.isin([9, 15])][['diff']].mean(),
    diff_df[diff_df.latitude.lt(30) & diff_df.climate_zone.eq(6) ][['diff']].mean(),
    diff_df[diff_df.latitude.lt(30) & diff_df.climate_zone.isin([1, 2, 3])][['diff']].mean(),
    diff_df[diff_df.longitude.between(-100, -80) & diff_df.climate_zone.eq(25)][['diff']].mean(),
    diff_df[diff_df.climate_zone.eq(4)][['diff']].mean()],
    keys = ['Pacific Northwest', 'Southern Texas', 'Southern Florida', 'Mid-west Cornbelt', 'Deserts'], axis=1)
# with pd.option_context('display.float_format', '{:.2f}'.format):
#     display(temp_df3

In [None]:
temp_df1 = pd.concat([
    data_df[data_df.longitude.lt(-120) & data_df.latitude.gt(42) & data_df.climate_zone.isin([9, 15])][['lfmc']].mean(),
    data_df[data_df.latitude.lt(30) & data_df.climate_zone.eq(6) ][['lfmc']].mean(),
    data_df[data_df.latitude.lt(30) & data_df.climate_zone.isin([1, 2, 3])][['lfmc']].mean(),
    data_df[data_df.longitude.between(-100, -80) & data_df.climate_zone.eq(25)][['lfmc']].mean(),
    data_df[data_df.climate_zone.eq(4)][['lfmc']].mean()],
    keys = ['Pacific Northwest', 'Southern Texas', 'Southern Florida', 'Mid-west Cornbelt', 'Deserts'], axis=1)
# with pd.option_context('display.float_format', '{:.2f}'.format):
#     display(temp_df1)

In [None]:
temp_df2 = pd.concat([
    modis_df[modis_df.longitude.lt(-120) & modis_df.latitude.gt(42) & modis_df.climate_zone.isin([9, 15])][['lfmc']].mean(),
    modis_df[modis_df.latitude.lt(30) & modis_df.climate_zone.eq(6)][['lfmc']].mean(),
    modis_df[modis_df.latitude.lt(30) & modis_df.climate_zone.isin([1, 2, 3])][['lfmc']].mean(),
    modis_df[modis_df.longitude.between(-100, -80) & modis_df.climate_zone.eq(25)][['lfmc']].mean(),
    modis_df[modis_df.climate_zone.eq(4)][['lfmc']].mean()],
    keys = ['Pacific Northwest', 'Southern Texas', 'Southern Florida', 'Mid-west Cornbelt', 'Deserts'], axis=1)
# with pd.option_context('display.float_format', '{:.2f}'.format):
#     display(temp_df2)

In [None]:
with pd.option_context('display.float_format', '{:.2f}'.format):
    display(pd.concat([temp_df1, temp_df2, temp_df3], keys=['Multi-tempCNN', 'Modis-tempCNN', 'Difference']).droplevel(1))

## Estimation differences by climate zone

In [None]:
czones = pd.read_csv(os.path.join(common.SOURCE_DIR, 'Climate_zones.csv'), index_col=0)
temp_df = data_df.groupby(['climate_zone'])[['lfmc', 'conf']].mean().merge(czones['Code'], left_index=True, right_index=True).set_index('Code')
temp_df.columns = ['Mean LFMC', 'Uncertainty']
temp_df['Differences'] = diff_df.groupby(['climate_zone'])[['diff']].mean().merge(czones['Code'], left_index=True, right_index=True).set_index('Code')
temp_df['Pixel Count'] = data_df.groupby(['climate_zone'])[['lfmc']].count().merge(czones['Code'], left_index=True, right_index=True).set_index('Code')
temp_df['% of CONUS'] = temp_df['Pixel Count'] * 100 / data_df.shape[0]
temp_df.rename_axis('Climate Zone', inplace=True)
with pd.option_context('display.float_format', '{:.2f}'.format):
    display(temp_df.sort_values('Uncertainty'))

In [None]:
colours = czones[czones.Code.isin(temp_df.index)][['Red', 'Green', 'Blue']].values / 256
axes = temp_df[['Mean LFMC', 'Uncertainty', 'Differences', '% of CONUS']].plot(kind='bar', subplots=True, layout=(2,2), figsize=(12, 8), sharex=True, legend=False)
for ax in axes.flatten():
    for p, c in zip(ax.patches, colours):
        p.set_facecolor(c)
plt.tight_layout()

In [None]:
czone_file = os.path.join(common.MAPS_DIR, 'Climate_zones', 'climate_zones.vrt')
czone_tif = gdal.Open(czone_file, gdal.GA_ReadOnly)
czone_array = czone_tif.ReadAsArray()
czone_array = czone_array.astype(np.int8)
df2 = pd.DataFrame(czone_array.flatten(), columns=['czone'])
df2 = df2[df2.czone > 0]
df2.groupby(['czone']).size()