# Generate CONUS LFMC Image
#### Description
Creates a GeoTiff image of LFMC predictions that can be used to produce LFMC maps.

#### Input Images
1. An image of auxiliary data - latitude, longitude, elevation, slope, aspect and climate zone
2. Images of MODIS data for at least 1 year prior to the mapping date
3. Images of PRISM data for at least 1 year prior to the mapping date  
Note: Band names for PRISM data are confusing. When GEE converts an image collection to an image, it includes the date in the band name. Timestamps on PRISM data are midday, so when they are converted (rounded) to a date, this becomes the following day. So in the extracted GeoTiffs, bands for 2016-10-01 data will named for 2016-10-02!

#### Other Inputs
1. Model directory - this should contain "run" directories - one for each model in the ensemble.
2. Data used to train the model - The notebook extracts normalisation bounds and one-hot encodings needed to prepare the input data. The normalisation bounds are saved to csv files, so if these files already exist, bounds can be loaded from these instead.
3. Legend file for Koppen climate zones. This should be a CSV as created by the "Extract Auxiliary Data.ipynb" notebook. Used to convert the climate zone numbers in the auxiliary input into climate zone codes


In [1]:
import glob
import numpy as np
import os
import pandas as pd
import time

from osgeo import gdal

In [2]:
import initialise
import common
from display_utils import display_frames

In [3]:
KOPPEN_LEGEND = os.path.join(common.SOURCE_DIR, 'Climate_zones.csv')
czones = pd.read_csv(KOPPEN_LEGEND, index_col=0)

In [4]:
april_file = os.path.join(common.MAPS_DIR, 'LFMC_maps', 'LFMC_difference_2018-04-01.tif')
october_file = os.path.join(common.MAPS_DIR, 'LFMC_maps', 'LFMC_difference_2018-10-01.tif')

In [5]:
april_image = gdal.Open(april_file, gdal.GA_ReadOnly)
april_data = april_image.ReadAsArray()
april_data = april_data.flatten()[(april_data.flatten() > -10000)]

In [6]:
april_diffs = pd.Series(april_data).abs()

In [7]:
april_diffs.quantile([.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])

0.1     1.100246
0.2     2.231766
0.3     3.431123
0.4     4.744949
0.5     6.222733
0.6     7.868958
0.7     9.725439
0.8    11.871115
0.9    14.875572
1.0    71.509903
dtype: float64

In [8]:
(april_diffs.le(10).sum()/april_diffs.count()).round(2)

0.71

In [9]:
october_image = gdal.Open(october_file, gdal.GA_ReadOnly)
october_data = october_image.ReadAsArray()
october_data = october_data.flatten()[(october_data.flatten() > -10000)]
october_diffs = pd.Series(october_data).abs()

In [10]:
october_diffs.quantile([.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])

0.1     0.842575
0.2     1.708359
0.3     2.618912
0.4     3.610596
0.5     4.724976
0.6     6.033585
0.7     7.639532
0.8     9.761243
0.9    13.123604
1.0    62.081757
dtype: float64

In [11]:
(october_diffs.le(10).sum()/october_diffs.count()).round(2)

0.81

In [12]:
(april_diffs.le(5).sum()/april_diffs.count()).round(2)

0.42

In [13]:
(october_diffs.le(5).sum()/october_diffs.count()).round(2)

0.52

In [14]:
april_proj_file = os.path.join(common.MAPS_DIR, 'LFMC_maps', 'Projection_base_2018-04-01.tif')
october_proj_file = os.path.join(common.MAPS_DIR, 'LFMC_maps', 'Projection_base_2018-10-01.tif')

In [15]:
april_image = gdal.Open(april_proj_file, gdal.GA_ReadOnly)
april_data = april_image.ReadAsArray()[0]
april_projections = pd.Series(april_data.flatten()[(april_data.flatten() > -999)])
april_projections.describe().round(2)

count    2498785.00
mean         116.49
std           20.91
min           49.56
25%          100.08
50%          117.50
75%          128.58
max          249.38
dtype: float64

In [16]:
october_image = gdal.Open(october_proj_file, gdal.GA_ReadOnly)
october_data = october_image.ReadAsArray()[0]
october_projections = pd.Series(october_data.flatten()[(october_data.flatten() > -999)])
october_projections.describe().round(2)

count    2498785.00
mean         111.84
std           20.88
min           47.48
25%           98.87
50%          108.40
75%          128.24
max          205.92
dtype: float64

In [17]:
def data_to_series(data, min_value=-999):
    return pd.Series(data.flatten()[(data.flatten() > min_value)])

In [18]:
west_april = data_to_series(april_data[:, :1287])
east_april = data_to_series(april_data[:, 1287:])
west_october = data_to_series(october_data[:, :1287])
east_october = data_to_series(october_data[:, 1287:])

In [77]:
split_point1 = 1544 # 1154
split_point2 = 1544 # 1655  # 1154
west_april = data_to_series(april_data[:, :split_point1])
central_april = data_to_series(april_data[:, split_point1:split_point2])
east_april = data_to_series(april_data[:, split_point2:])
west_october = data_to_series(october_data[:, :split_point1])
central_october = data_to_series(october_data[:, split_point1:split_point2])
east_october = data_to_series(october_data[:, split_point2:])

In [78]:
pd.DataFrame({'conus_april': april_projections.describe(), 'conus_october': october_projections.describe(),
              'west_april': west_april.describe(), 'west_october': west_october.describe(),
              'central_april': central_april.describe(), 'central_october': central_october.describe(),
              'east_april': east_april.describe(), 'east_october': east_october.describe()}).round(2)

Unnamed: 0,conus_april,conus_october,west_april,west_october,central_april,central_october,east_april,east_october
count,2498785.0,2498785.0,1430150.0,1430150.0,0.0,0.0,1068635.0,1068635.0
mean,116.49,111.84,116.91,100.0,,,115.92,127.68
std,20.91,20.88,23.11,15.86,,,17.53,15.62
min,49.56,47.48,49.56,47.48,,,75.4,90.34
25%,100.08,98.87,100.96,91.77,,,99.28,112.66
50%,117.5,108.4,116.11,101.29,,,118.56,130.32
75%,128.58,128.24,129.2,108.03,,,127.72,140.37
max,249.38,205.92,249.38,205.92,,,199.06,187.41


In [20]:
KOPPEN_FILE = 'Beck_KG_V1_present_0p0083.tif'
# Climate zone source
czone_file = os.path.join(common.SOURCE_DIR, KOPPEN_FILE)
czone_src = gdal.Open(czone_file, gdal.GA_ReadOnly)
czone_proj = czone_src.GetProjection()
czone_geotrans = czone_src.GetGeoTransform()

# Auxiliary projection and resolution
aux_proj = april_image.GetProjection()
aux_geotrans = april_image.GetGeoTransform()
x_size = april_image.RasterXSize
y_size = april_image.RasterYSize

# In-memory raster for the reprojected data
dst = gdal.GetDriverByName('MEM').Create("", x_size, y_size, 1, gdal.GDT_Byte)
dst.SetGeoTransform(aux_geotrans)
dst.SetProjection(aux_proj)

# Reproject climate zone data to auxiliary projection and resolution. Use the mode of the climate zones
gdal.ReprojectImage(czone_src, dst, czone_proj, aux_proj, gdal.GRA_Mode)

czone_data = dst.ReadAsArray()
czone_data.shape

(1383, 3217)

In [21]:
df = pd.DataFrame({'April_data': april_data.flatten(),
                   'October_data': october_data.flatten(),
                   'Climate_zone': czone_data.flatten()})
df = df[df.April_data > -999]

In [22]:
df = df.merge(czones[['Code', 'Description']], left_on="Climate_zone", right_index=True
             ).drop(columns='Climate_zone').rename(columns={'Code': 'Climate_zone'})
df['Difference'] = df.October_data - df.April_data

In [23]:
display_frames([d[1].unstack().T for d in df.drop(columns='Description').groupby('Climate_zone').describe().iterrows()],
               [str(d[0]) for d in df.groupby('Climate_zone').describe().iterrows()],
               precision=2)

Unnamed: 0,April_data,October_data,Difference
count,309.0,309.0,309.0
mean,164.16,152.92,-11.24
std,5.13,5.31,5.07
min,150.74,140.91,-23.1
25%,161.39,149.24,-14.48
50%,164.02,152.47,-12.18
75%,166.91,155.71,-8.38
max,181.15,172.18,5.58

Unnamed: 0,April_data,October_data,Difference
count,2075.0,2075.0,2075.0
mean,170.3,156.02,-14.28
std,6.11,5.95,5.97
min,153.45,138.02,-31.19
25%,165.73,152.18,-18.48
50%,169.56,155.53,-14.12
75%,174.82,159.3,-10.29
max,199.06,178.5,8.69

Unnamed: 0,April_data,October_data,Difference
count,4552.0,4552.0,4552.0
mean,171.79,156.86,-14.92
std,6.95,4.85,6.03
min,152.43,138.52,-30.24
25%,166.06,154.11,-19.33
50%,172.3,157.33,-15.44
75%,177.57,160.16,-10.84
max,190.62,173.9,7.38

Unnamed: 0,April_data,October_data,Difference
count,44631.0,44631.0,44631.0
mean,164.19,128.87,-35.32
std,23.86,24.41,7.32
min,90.82,72.33,-72.31
25%,145.7,109.34,-40.16
50%,162.78,125.42,-35.63
75%,184.23,146.91,-30.6
max,233.68,203.41,4.36

Unnamed: 0,April_data,October_data,Difference
count,616658.0,616658.0,616658.0
mean,116.49,98.46,-18.03
std,20.56,9.29,26.17
min,77.9,72.28,-131.52
25%,100.54,91.33,-35.65
50%,114.17,99.06,-13.72
75%,128.71,105.69,3.2
max,242.75,180.22,31.25

Unnamed: 0,April_data,October_data,Difference
count,59555.0,59555.0,59555.0
mean,78.66,62.3,-16.36
std,15.53,6.09,13.27
min,49.56,47.48,-90.5
25%,67.61,58.19,-26.08
50%,73.82,61.24,-11.94
75%,88.08,65.47,-7.02
max,181.15,118.39,26.63

Unnamed: 0,April_data,October_data,Difference
count,145253.0,145253.0,145253.0
mean,133.46,96.82,-36.64
std,21.83,10.0,25.98
min,78.52,66.33,-151.55
25%,116.66,89.16,-54.09
50%,130.04,97.43,-33.03
75%,147.5,103.89,-15.92
max,249.38,205.92,29.85

Unnamed: 0,April_data,October_data,Difference
count,538052.0,538052.0,538052.0
mean,129.3,136.22,6.93
std,9.42,13.57,14.4
min,96.57,89.59,-56.61
25%,122.1,131.66,-2.11
50%,128.1,139.54,8.18
75%,135.8,145.28,17.96
max,179.12,177.39,38.31

Unnamed: 0,April_data,October_data,Difference
count,6903.0,6903.0,6903.0
mean,132.52,157.14,24.62
std,10.32,7.19,9.32
min,93.04,131.88,-17.11
25%,130.24,152.09,20.29
50%,134.15,157.39,23.9
75%,138.02,162.12,27.66
max,165.45,187.42,56.2

Unnamed: 0,April_data,October_data,Difference
count,31.0,31.0,31.0
mean,121.98,131.88,9.9
std,2.94,2.64,2.83
min,116.19,126.61,5.0
25%,119.46,129.95,8.27
50%,122.35,131.65,9.64
75%,124.01,134.02,11.1
max,128.17,136.88,15.17

Unnamed: 0,April_data,October_data,Difference
count,36133.0,36133.0,36133.0
mean,116.21,72.32,-43.89
std,16.67,8.89,20.27
min,75.07,56.2,-110.74
25%,104.87,65.71,-59.19
50%,117.89,69.83,-48.22
75%,128.69,76.71,-30.06
max,185.01,117.82,21.88

Unnamed: 0,April_data,October_data,Difference
count,63550.0,63550.0,63550.0
mean,117.98,102.36,-15.62
std,15.44,17.24,19.36
min,72.27,58.31,-110.39
25%,108.3,89.13,-26.31
50%,120.4,103.52,-9.32
75%,127.9,116.88,-1.53
max,191.63,154.3,23.91

Unnamed: 0,April_data,October_data,Difference
count,21.0,21.0,21.0
mean,113.38,117.5,4.12
std,4.71,7.25,4.71
min,104.48,104.51,-7.84
25%,109.71,114.65,0.72
50%,113.76,116.36,3.83
75%,116.31,123.04,8.33
max,122.33,129.63,11.84

Unnamed: 0,April_data,October_data,Difference
count,469272.0,469272.0,469272.0
mean,94.12,107.53,13.41
std,6.46,7.12,8.34
min,73.37,77.13,-67.57
25%,89.65,103.11,7.21
50%,94.37,107.74,12.22
75%,98.88,112.4,19.85
max,152.03,152.76,49.26

Unnamed: 0,April_data,October_data,Difference
count,321091.0,321091.0,321091.0
mean,117.98,122.0,4.03
std,8.08,14.19,16.02
min,82.24,75.24,-109.81
25%,115.0,110.19,-1.97
50%,118.15,126.5,9.73
75%,121.71,132.66,14.53
max,200.85,159.92,42.29

Unnamed: 0,April_data,October_data,Difference
count,68657.0,68657.0,68657.0
mean,114.72,107.43,-7.29
std,8.97,10.73,13.31
min,88.01,69.86,-100.07
25%,108.81,101.69,-13.2
50%,113.61,107.78,-3.78
75%,119.48,113.32,1.97
max,207.6,186.21,31.85

Unnamed: 0,April_data,October_data,Difference
count,1245.0,1245.0,1245.0
mean,160.06,89.97,-70.09
std,16.56,4.39,17.92
min,107.48,81.27,-124.08
25%,149.33,86.88,-83.36
50%,163.33,89.47,-73.34
75%,172.16,92.52,-60.61
max,207.78,109.36,-9.22

Unnamed: 0,April_data,October_data,Difference
count,68926.0,68926.0,68926.0
mean,124.06,111.09,-12.98
std,14.79,13.37,19.57
min,88.64,78.91,-114.89
25%,113.36,100.79,-23.18
50%,123.85,109.93,-10.56
75%,132.99,120.24,1.62
max,209.84,151.74,28.98

Unnamed: 0,April_data,October_data,Difference
count,19668.0,19668.0,19668.0
mean,115.8,120.49,4.68
std,8.81,11.53,9.97
min,86.59,88.36,-57.82
25%,109.8,112.18,-0.69
50%,115.31,119.01,6.01
75%,121.53,129.01,11.48
max,178.65,177.99,44.61

Unnamed: 0,April_data,October_data,Difference
count,13299.0,13299.0,13299.0
mean,104.98,108.74,3.76
std,7.17,4.54,7.07
min,87.48,95.13,-20.3
25%,99.65,105.61,-1.61
50%,106.12,108.74,3.39
75%,110.37,111.95,9.06
max,139.58,136.89,26.06

Unnamed: 0,April_data,October_data,Difference
count,14743.0,14743.0,14743.0
mean,109.51,105.68,-3.84
std,5.18,4.79,5.76
min,84.84,92.06,-25.5
25%,105.98,102.39,-7.55
50%,109.69,105.78,-4.02
75%,113.18,109.07,-0.35
max,138.27,141.31,29.91

Unnamed: 0,April_data,October_data,Difference
count,4161.0,4161.0,4161.0
mean,123.95,116.19,-7.76
std,15.2,14.96,14.67
min,87.58,81.83,-72.32
25%,111.63,106.44,-17.91
50%,121.55,111.9,-6.09
75%,134.51,121.84,2.34
max,194.95,188.45,55.44


In [24]:
df.groupby(['Climate_zone', 'Description']).median().astype(int).sort_values('Difference')

Unnamed: 0_level_0,Unnamed: 1_level_0,April_data,October_data,Difference
Climate_zone,Description,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Dsa,"Cold, dry summer, hot summer",163,89,-73
Csa,"Temperate, dry summer, hot summer",117,69,-48
BSh,"Arid, steppe, hot",162,125,-35
BWk,"Arid, desert, cold",130,97,-33
Aw,"Tropical, savannah",172,157,-15
Am,"Tropical, monsoon",169,155,-14
BSk,"Arid, steppe, cold",114,99,-13
Af,"Tropical, rainforest",164,152,-12
BWh,"Arid, desert, hot",73,61,-11
Dsb,"Cold, dry summer, warm summer",123,109,-10


In [26]:
df['April_diffs'] = april_diffs.set_axis(df.index)
df['October_diffs'] = october_diffs.set_axis(df.index)

In [36]:
df[df.Description.str.contains('Arid') | df.Description.str.contains('dry summer')][['April_data', 'October_data']].median()

April_data      117.968964
October_data     98.923248
dtype: float32

In [37]:
df[~(df.Description.str.contains('Arid') | df.Description.str.contains('dry summer'))][['April_data', 'October_data']].median()

April_data      117.304665
October_data    118.929665
dtype: float32

In [75]:
df[df.Description.str.contains('Arid')
   | df.Description.str.contains('Tropical')
   | df.Description.str.contains('Polar')
   | df.Description.str.contains('dry summer')
  ][['April_data', 'October_data']].mean()

April_data      119.672554
October_data     98.495506
dtype: float32

In [74]:
df[~(df.Description.str.contains('Arid')
   | df.Description.str.contains('Tropical')
   | df.Description.str.contains('Polar')
   | df.Description.str.contains('dry summer'))
  ][['April_data', 'October_data']].mean()

April_data      114.118591
October_data    121.782051
dtype: float32

In [72]:
czones[czones.Description.str.contains('Arid') 
       | czones.Description.str.contains('Tropical')
       | czones.Description.str.contains('Polar')
       | czones.Description.str.contains('dry summer')]

Unnamed: 0_level_0,Code,Description,Red,Green,Blue
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Af,"Tropical, rainforest",0,0,255
2,Am,"Tropical, monsoon",0,120,255
3,Aw,"Tropical, savannah",70,170,250
4,BWh,"Arid, desert, hot",255,0,0
5,BWk,"Arid, desert, cold",255,150,150
6,BSh,"Arid, steppe, hot",245,165,0
7,BSk,"Arid, steppe, cold",255,220,100
8,Csa,"Temperate, dry summer, hot summer",255,255,0
9,Csb,"Temperate, dry summer, warm summer",200,200,0
10,Csc,"Temperate, dry summer, cold summer",150,150,0


In [73]:
czones[~(czones.Description.str.contains('Arid') 
       | czones.Description.str.contains('Tropical')
       | czones.Description.str.contains('Polar')
       | czones.Description.str.contains('dry summer'))]

Unnamed: 0_level_0,Code,Description,Red,Green,Blue
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
11,Cwa,"Temperate, dry winter, hot summer",150,255,150
12,Cwb,"Temperate, dry winter, warm summer",100,200,100
13,Cwc,"Temperate, dry winter, cold summer",50,150,50
14,Cfa,"Temperate, no dry season, hot summer",200,255,80
15,Cfb,"Temperate, no dry season, warm summer",100,255,80
16,Cfc,"Temperate, no dry season, cold summer",50,200,0
21,Dwa,"Cold, dry winter, hot summer",170,175,255
22,Dwb,"Cold, dry winter, warm summer",90,120,220
23,Dwc,"Cold, dry winter, cold summer",75,80,180
24,Dwd,"Cold, dry winter, very cold winter",50,0,135


In [42]:
xform = april_image.GetGeoTransform()

In [43]:
xform[0] + xform[1] * 1287

-101.61742493960028

In [44]:
(-104 - xform[0]) / xform[1]

1154.386478749775

In [64]:
xform[0] + xform[1] * 1154

-104.00694359535821

In [67]:
(-95 - xform[0]) / xform[1]

1655.324187319506

In [76]:
(-97 - xform[0]) / xform[1]

1544.0046965262325