# Generate CONUS LFMC Image
#### Description
Creates a GeoTiff image of LFMC predictions that can be used to produce LFMC maps.

#### Input Images
1. An image of auxiliary data - latitude, longitude, elevation, slope, aspect and climate zone
2. Images of MODIS data for at least 1 year prior to the mapping date
3. Images of PRISM data for at least 1 year prior to the mapping date  
Note: Band names for PRISM data are confusing. When GEE converts an image collection to an image, it includes the date in the band name. Timestamps on PRISM data are midday, so when they are converted (rounded) to a date, this becomes the following day. So in the extracted GeoTiffs, bands for 2016-10-01 data will named for 2016-10-02!

#### Other Inputs
1. Model directory - this should contain "run" directories - one for each model in the ensemble.
2. Data used to train the model - The notebook extracts normalisation bounds and one-hot encodings needed to prepare the input data. The normalisation bounds are saved to csv files, so if these files already exist, bounds can be loaded from these instead.
3. Legend file for Koppen climate zones. This should be a CSV as created by the "Extract Auxiliary Data.ipynb" notebook. Used to convert the climate zone numbers in the auxiliary input into climate zone codes


In [None]:
import glob
import numpy as np
import os
import pandas as pd
import time

from osgeo import gdal

In [None]:
import initialise
import common
from display_utils import display_frames

In [None]:
KOPPEN_LEGEND = os.path.join(common.SOURCE_DIR, 'Climate_zones.csv')
czones = pd.read_csv(KOPPEN_LEGEND, index_col=0)

In [None]:
april_file = os.path.join(common.MAPS_DIR, 'LFMC_maps', 'LFMC_difference_2018-04-01.tif')
october_file = os.path.join(common.MAPS_DIR, 'LFMC_maps', 'LFMC_difference_2018-10-01.tif')

In [None]:
april_image = gdal.Open(april_file, gdal.GA_ReadOnly)
april_data = april_image.ReadAsArray()
april_data = april_data.flatten()[(april_data.flatten() > -10000)]

In [None]:
april_diffs = pd.Series(april_data).abs()

In [None]:
april_diffs.quantile([.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])

In [None]:
(april_diffs.le(10).sum()/april_diffs.count()).round(2)

In [None]:
october_image = gdal.Open(october_file, gdal.GA_ReadOnly)
october_data = october_image.ReadAsArray()
october_data = october_data.flatten()[(october_data.flatten() > -10000)]
october_diffs = pd.Series(october_data).abs()

In [None]:
october_diffs.quantile([.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])

In [None]:
(october_diffs.le(10).sum()/october_diffs.count()).round(2)

In [None]:
(april_diffs.le(5).sum()/april_diffs.count()).round(2)

In [None]:
(october_diffs.le(5).sum()/october_diffs.count()).round(2)

In [None]:
april_proj_file = os.path.join(common.MAPS_DIR, 'LFMC_maps', 'Projection_base_2018-04-01.tif')
october_proj_file = os.path.join(common.MAPS_DIR, 'LFMC_maps', 'Projection_base_2018-10-01.tif')

In [None]:
april_image = gdal.Open(april_proj_file, gdal.GA_ReadOnly)
april_data = april_image.ReadAsArray()[0]
april_projections = pd.Series(april_data.flatten()[(april_data.flatten() > -999)])
april_projections.describe().round(2)

In [None]:
october_image = gdal.Open(october_proj_file, gdal.GA_ReadOnly)
october_data = october_image.ReadAsArray()[0]
october_projections = pd.Series(october_data.flatten()[(october_data.flatten() > -999)])
october_projections.describe().round(2)

In [None]:
def data_to_series(data, min_value=-999):
    return pd.Series(data.flatten()[(data.flatten() > min_value)])

In [None]:
west_april = data_to_series(april_data[:, :1287])
east_april = data_to_series(april_data[:, 1287:])
west_october = data_to_series(october_data[:, :1287])
east_october = data_to_series(october_data[:, 1287:])

In [None]:
split_point1 = 1544 # 1154
split_point2 = 1544 # 1655  # 1154
west_april = data_to_series(april_data[:, :split_point1])
central_april = data_to_series(april_data[:, split_point1:split_point2])
east_april = data_to_series(april_data[:, split_point2:])
west_october = data_to_series(october_data[:, :split_point1])
central_october = data_to_series(october_data[:, split_point1:split_point2])
east_october = data_to_series(october_data[:, split_point2:])

In [None]:
pd.DataFrame({'conus_april': april_projections.describe(), 'conus_october': october_projections.describe(),
              'west_april': west_april.describe(), 'west_october': west_october.describe(),
              'central_april': central_april.describe(), 'central_october': central_october.describe(),
              'east_april': east_april.describe(), 'east_october': east_october.describe()}).round(2)

In [None]:
KOPPEN_FILE = 'Beck_KG_V1_present_0p0083.tif'
# Climate zone source
czone_file = os.path.join(common.SOURCE_DIR, KOPPEN_FILE)
czone_src = gdal.Open(czone_file, gdal.GA_ReadOnly)
czone_proj = czone_src.GetProjection()
czone_geotrans = czone_src.GetGeoTransform()

# Auxiliary projection and resolution
aux_proj = april_image.GetProjection()
aux_geotrans = april_image.GetGeoTransform()
x_size = april_image.RasterXSize
y_size = april_image.RasterYSize

# In-memory raster for the reprojected data
dst = gdal.GetDriverByName('MEM').Create("", x_size, y_size, 1, gdal.GDT_Byte)
dst.SetGeoTransform(aux_geotrans)
dst.SetProjection(aux_proj)

# Reproject climate zone data to auxiliary projection and resolution. Use the mode of the climate zones
gdal.ReprojectImage(czone_src, dst, czone_proj, aux_proj, gdal.GRA_Mode)

czone_data = dst.ReadAsArray()
czone_data.shape

In [None]:
df = pd.DataFrame({'April_data': april_data.flatten(),
                   'October_data': october_data.flatten(),
                   'Climate_zone': czone_data.flatten()})
df = df[df.April_data > -999]

In [None]:
df = df.merge(czones[['Code', 'Description']], left_on="Climate_zone", right_index=True
             ).drop(columns='Climate_zone').rename(columns={'Code': 'Climate_zone'})
df['Difference'] = df.October_data - df.April_data

In [None]:
display_frames([d[1].unstack().T for d in df.drop(columns='Description').groupby('Climate_zone').describe().iterrows()],
               [str(d[0]) for d in df.groupby('Climate_zone').describe().iterrows()],
               precision=2)

In [None]:
df.groupby(['Climate_zone', 'Description']).median().astype(int).sort_values('Difference')

In [None]:
df['April_diffs'] = april_diffs.set_axis(df.index)
df['October_diffs'] = october_diffs.set_axis(df.index)

In [None]:
df[df.Description.str.contains('Arid') | df.Description.str.contains('dry summer')][['April_data', 'October_data']].median()

In [None]:
df[~(df.Description.str.contains('Arid') | df.Description.str.contains('dry summer'))][['April_data', 'October_data']].median()

In [None]:
df[df.Description.str.contains('Arid')
   | df.Description.str.contains('Tropical')
   | df.Description.str.contains('Polar')
   | df.Description.str.contains('dry summer')
  ][['April_data', 'October_data']].mean()

In [None]:
df[~(df.Description.str.contains('Arid')
   | df.Description.str.contains('Tropical')
   | df.Description.str.contains('Polar')
   | df.Description.str.contains('dry summer'))
  ][['April_data', 'October_data']].mean()

In [None]:
czones[czones.Description.str.contains('Arid') 
       | czones.Description.str.contains('Tropical')
       | czones.Description.str.contains('Polar')
       | czones.Description.str.contains('dry summer')]

In [None]:
czones[~(czones.Description.str.contains('Arid') 
       | czones.Description.str.contains('Tropical')
       | czones.Description.str.contains('Polar')
       | czones.Description.str.contains('dry summer'))]

In [None]:
xform = april_image.GetGeoTransform()

In [None]:
xform[0] + xform[1] * 1287

In [None]:
(-104 - xform[0]) / xform[1]

In [None]:
xform[0] + xform[1] * 1154

In [None]:
(-95 - xform[0]) / xform[1]

In [None]:
(-97 - xform[0]) / xform[1]