In [None]:
# import modules

import ee
from osgeo import gdal
from osgeo import gdalconst
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from scipy import ndimage
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import linregress
from sklearn.metrics import mean_squared_error

## get sentinel-1 1000 m data from ee

here, to get 1000 m Sentinel-1 GRD SAR image, we can directly use the ee api to transfer data from ee server to python arrays. This is possible because the transfer data size is relatively small. For instance, transferring Sentinel-1 image at original 10 m resolution is not possible due to data transfer limits set by ee. In this scenario, one can transfer the .tif files to google drive. For this refer to 'data_collection.ipynb'

In [None]:
# initialize the ee api through your credentials

# ee.Authenticate()
ee.Initialize()

In [None]:
# load study area using feature collection

fc = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level1")
roi = fc.filter(ee.Filter.eq('ADM1_NAME', 'Zuid-holland'))

In [None]:
# transfer images from ee to numpy array (for intersecting dates)

sentinel_dates = ['2020-03-25']
sar_arrs = []

for dates in sentinel_dates:
    sentinel = ee.ImageCollection('COPERNICUS/S1_GRD')
    asc = sentinel.filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING')).filter(ee.Filter.eq('instrumentMode', 'IW'))
    platform = asc.filter(ee.Filter.eq('platform_number', 'A'))
    coll_param = platform.filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')).filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH')).select(['VV', 'VH'])

    sentinel_roi = coll_param.filterBounds(roi.geometry())

    bcoff = sentinel_roi.filterDate(dates, '2021-01-01').first()

    bcoff_new = bcoff.reduceResolution(reducer=ee.Reducer.median(), maxPixels=1e4).reproject(crs='EPSG:4326', scale=1000)

    sar_arr = bcoff_new.sampleRectangle(region=roi.geometry(), defaultValue=-9999)

    sar_arr_VV = sar_arr.get('VV')
    sar_arr_VH = sar_arr.get('VH')

    npsar_arr_VV = np.array(sar_arr_VV.getInfo())
    sar_arrs.append(npsar_arr_VV)
    npsar_arr_VH = np.array(sar_arr_VH.getInfo())
    sar_arrs.append(npsar_arr_VH)

    print(npsar_arr_VV.shape)
    print(npsar_arr_VH.shape)

In [None]:
vv_arr = sar_arrs[0]
vh_arr = sar_arrs[1]

## LST 1000 m data

the images collected at different spatial resolutions could have a mismatch in number of pixels due to the border pixels not being included. For instance, a 1000 m image that has shape of (70, 131) should have the shape of (700, 1310) at 100 m resolution. This might not always be the case. Thus, we need some extent correction# get data for extent correction

In [None]:
# get data for extent correction

extent_data = gdal.Open('data/modisval_2905.tif')
geoTransform = extent_data.GetGeoTransform()
ulx = geoTransform[0]
uly = geoTransform[3]
lrx = ulx + geoTransform[1] * extent_data.RasterXSize
lry = uly + geoTransform[5] * extent_data.RasterYSize
print(ulx, uly, lrx, lry)

In [None]:
# get landsat validation data and cut by modis extent

lst_full = gdal.Open('data/l8/landsatval_2503_100.tif')
tmp_data = gdal.Translate('/vsimem/in_memory_output.tif', lst_full, projWin=[ulx, uly, lrx, lry],
                          outputType=gdalconst.GDT_Float32, noData=np.nan)
lst_full_arr = tmp_data.ReadAsArray()
lst_full_arr = lst_full_arr*0.00341802+149.0
lst_full_farr = ndimage.median_filter(lst_full_arr, 3)
print(lst_full_farr.shape)

# upscale landsat lst to 1000 m

new_landsat_lst = lst_full_farr.reshape(-1, 10, 131, 10)
new_landsat_lst_arr = np.median(new_landsat_lst, (-1, -3))
print(new_landsat_lst_arr.shape)
print(new_landsat_lst_arr)

lst_arr = new_landsat_lst_arr

In [None]:
figure(figsize=(14, 12), dpi=300)
plt.imshow(lst_arr, cmap='RdBu_r')
plt.colorbar(orientation='horizontal')
plt.show()

In [None]:
figure(figsize=(14, 12), dpi=300)
plt.imshow(vv_arr, cmap='Greys_r')
plt.colorbar(orientation='horizontal')
plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(20, 20))

img_1 = ax[0].imshow(lst_arr, cmap='RdBu_r')
fig.colorbar(img_1, ax=ax[0], orientation='horizontal')
ax[0].set_title('LST image over Zuid-Holland (1000 m)', y=-0.5)

img_2 = ax[1].imshow(vv_arr, cmap='Greys_r')
fig.colorbar(img_2, ax=ax[1], orientation='horizontal')
ax[1].set_title('VV image over Zuid-Holland (1000 m)', y=-0.5)

fig.show()

## data transformation

Now that we have our basic data, let's do some data transformation and feature engineering

### Sentinel-1 data

here, we have to basically generate two set of features. First is Sentinel-1 image at 1000 m resolution and 100 m resolution (1000 m image would be used to train the model and 100 m image would serve as an input for the trained model). Second is the inclusion of neighboring values. Again we do this at both 1000 m and 100 m resolution.

In [None]:
# get sar data and cut it by extent

sar_full = gdal.Open('data/s1/sarval_2503_vv_vh.tif')
tmp_data_sar = gdal.Translate('/vsimem/in_memory_output.tif', sar_full, projWin=[ulx, uly, lrx, lry],
                              outputType=gdalconst.GDT_Float32, noData=np.nan)
vv_full_arr = tmp_data_sar.ReadAsArray()[0]
vv_full_farr = ndimage.median_filter(vv_full_arr, 3)
print(vv_full_farr.shape)

vh_full_arr = tmp_data_sar.ReadAsArray()[1]
vh_full_farr = ndimage.median_filter(vh_full_arr, 3)
print(vh_full_farr.shape)

In [None]:
# upscale to 100 m res

n_vv_full_arr = vv_full_farr.reshape(-1, 10, 1310, 10)
m_vv_full_arr = np.median(n_vv_full_arr, (-1, -3))
print(m_vv_full_arr.shape)
print(m_vv_full_arr)

n_vh_full_arr = vh_full_farr.reshape(-1, 10, 1310, 10)
m_vh_full_arr = np.median(n_vh_full_arr, (-1, -3))
print(m_vh_full_arr.shape)
print(m_vh_full_arr)

we now have generated the first set of features, i.e., the 1000 m and 100 m image. here, variables vv_arr and vh_arr refer to 1000 m images that would be used for training and variables m_vv_full_arr and m_vh_full_arr refer to 100 m images for prediction

In [None]:
# save the neighboring values as features

def test_func(values):
    nbor_list.append(values)
    return values.sum()


sar = [vv_arr, vh_arr]
fnborlist_sar_1000 = []

for arrs in sar:
    nbor_list = []
    footprint = np.array([[1, 1, 1, 1, 1],
                          [1, 1, 1, 1, 1],
                          [1, 1, 0, 1, 1],
                          [1, 1, 1, 1, 1],
                          [1, 1, 1, 1, 1]])

    results = ndimage.generic_filter(arrs, test_func, footprint=footprint)

    new_nborlist = []
    for nbor_arrays in nbor_list:
        new_nborlist.append(nbor_arrays.reshape(-1, 1).T)

    fnborlist_sar_1000.append(new_nborlist)

print(len(fnborlist_sar_1000))

In [None]:
def test_func(values):
    nbor_list.append(values)
    return values.sum()


sar = [m_vv_full_arr, m_vh_full_arr]
fnborlist_sar_100 = []

for arrs in sar:
    nbor_list = []
    footprint = np.array([[1, 1, 1, 1, 1],
                          [1, 1, 1, 1, 1],
                          [1, 1, 0, 1, 1],
                          [1, 1, 1, 1, 1],
                          [1, 1, 1, 1, 1]])

    results = ndimage.generic_filter(arrs, test_func, footprint=footprint)

    new_nborlist = []
    for nbor_arrays in nbor_list:
        new_nborlist.append(nbor_arrays.reshape(-1, 1).T)

    fnborlist_sar_100.append(new_nborlist)

print(len(fnborlist_sar_100))

now we also have the neighboring values stored in lists fnborlist_sar_1000 and fnborlist_sar_100

In [None]:
# generating column names which will be useful later

vv_nbr_list = []
for i in range(1, 25):
    vv_nbr_list.append('vv' + str(i))

vh_nbr_list = []
for i in range(1, 25):
    vh_nbr_list.append('vh' + str(i))

print(vh_nbr_list)

### ESA WorldCover data

now we create features from land cover data. specifically, we calculate proportion of land cover within each coarse resolution pixel and use that as a feature

In [None]:
lulc_data = gdal.Open('data/esa_lulc_10.tif')
tmp_data_lulc_up = gdal.Translate('/vsimem/validation_data/saving_mask_image_100.tif', lulc_data, projWin=[ulx, uly, lrx, lry],
                              outputType=gdalconst.GDT_Float32, noData=np.nan)
lulc_arr_n = tmp_data_lulc_up.ReadAsArray()
print(lulc_arr_n.shape)

In [None]:
figure(figsize=(14, 12), dpi=300)
plt.imshow(lulc_arr_n)
plt.colorbar(orientation='horizontal')
plt.title('LULC image over Zuid-Holland (100 m)', y=-0.1)
plt.show()

In [None]:
# Define the window size
window_size = [(100, 100), (10, 10)]
lcprop_list = []
idx_list = []

# Get the image shape
rows, cols = lulc_arr_n.shape

for window_size in window_size:
    # Compute the number of windows that fit the image
    n_rows = int(np.ceil(rows / window_size[0]))
    n_cols = int(np.ceil(cols / window_size[1]))
    print(n_rows, n_cols)

    # Initialize a list to store the windows
    windows = []

    # Iterate over the rows and columns of the image
    for i in range(n_rows):
        for j in range(n_cols):
            # Get the starting and ending rows and columns of the window
            start_row = i * window_size[0]
            end_row = start_row + window_size[0]
            start_col = j * window_size[1]
            end_col = start_col + window_size[1]

            # Get the window from the image
            window = lulc_arr_n[start_row:end_row, start_col:end_col]

            # Append the window to the list
            windows.append(window)

    # Convert the list to a numpy array
    windows = np.array(windows)
    # print(windows[0:10])

    pixel_number_list = []
    values_list = []
    column_arr = np.unique(lulc_arr_n)

    for array_number in range(len(windows)):
        pixel_number_list.append(array_number)
        unique, counts = np.unique(windows[array_number], return_counts=True)

        bool_arr = np.in1d(column_arr, unique)

        value_list = []
        unique_elem_pos = 0
        for element_pos in range(len(bool_arr)):
            if bool_arr[element_pos] == True:
                value = counts[unique_elem_pos]/(window_size[0]*window_size[1])
                value_list.append(value)
                unique_elem_pos += 1
            else:
                value = np.nan
                value_list.append(value)

        values_list.append(value_list)

    lcprop_list.append(values_list)
    idx_list.append(pixel_number_list)

here, the lcprop_list contains two elements corresponding to the land cover proportion features of each land cover class. The first element is calculated for 1000 m resolution which will be used for training and the second element is calculated for 100 m resolution which will be used as input for the trained model

In [None]:
# save to dataframe

lulc_df_1000 = pd.DataFrame(data=lcprop_list[0], index=idx_list[0], columns=column_arr.tolist())
lulc_df_1000 = lulc_df_1000.fillna(0)
lulc_df_1000 = lulc_df_1000.drop([np.nan], axis=1)
lulc_df_1000.columns = ['esa_0', 'esa_10', 'esa_20', 'esa_30', 'esa_40', 'esa_50', 'esa_60', 'esa_80', 'esa_90']
lulc_df_1000

In [None]:
lulc_df_1000 = lulc_df_1000.drop(['esa_0'], axis=1)
lulc_df_1000

In [None]:
# save to dataframe

lulc_df_100 = pd.DataFrame(data=lcprop_list[1], index=idx_list[1], columns=column_arr.tolist())
lulc_df_100 = lulc_df_100.fillna(0)
lulc_df_100 = lulc_df_100.drop([np.nan], axis=1)
lulc_df_100.columns = ['esa_0', 'esa_10', 'esa_20', 'esa_30', 'esa_40', 'esa_50', 'esa_60', 'esa_80', 'esa_90']
lulc_df_100

In [None]:
lulc_df_100 = lulc_df_100.drop(['esa_0'], axis=1)
lulc_df_100

### Sentinel-2 data

for sentinel-2 data we don't have any extra feature engineering. Simply, we use 1000 m data for training and 100 m data for prediction

In [None]:
# s2 1000 m product

s2_arrs = []

s2_data = gdal.Open('data/s2/s2_2603_1000.tif')

for i in range(s2_data.RasterCount):
    s2_arrs.append(s2_data.ReadAsArray()[i])

In [None]:
# flatten arrays to save to dataframe

s2_flatten_arrs = []
for arr in s2_arrs:
    s2_flatten_arrs.append(arr.flatten())

In [None]:
# s2 10 m product

s2_full_arrs = []

s2_full = gdal.Open('data/s2/s2_2603_10.tif')
tmp_data_s2 = gdal.Translate('/vsimem/in_memory_output.tif', s2_full, projWin=[ulx, uly, lrx, lry],
                              outputType=gdalconst.GDT_Float32, noData=np.nan)

for i in range(tmp_data_s2.RasterCount):
    arr = ndimage.median_filter(tmp_data_s2.ReadAsArray()[i], 3)
    s2_full_arrs.append(arr)

In [None]:
# upscale to 100 m res
s2_full_arrs_100 = []

for arr in s2_full_arrs:
    n_arr = arr.reshape(-1, 10, 1310, 10)
    m_n_arr = np.median(n_arr, (-1, -3))
    s2_full_arrs_100.append(m_n_arr.flatten().T)

### save to dataframe

In [None]:
# flatten the array to save it as a pandas dataframe

lst_flat = lst_arr.flatten()

vv_flat = vv_arr.flatten()

vh_flat = vh_arr.flatten()

In [None]:
data_df = pd.DataFrame(lst_flat.T, columns=['lst'])
data_df['vv'] = vv_flat.T
data_df['vh'] = vh_flat.T
data_df[vv_nbr_list] = np.concatenate(fnborlist_sar_1000[0])
data_df[vh_nbr_list] = np.concatenate(fnborlist_sar_1000[1])

data_df

In [None]:
# join land cover proportion df to main df
data_df = data_df.join(lulc_df_1000)

In [None]:
# save s2 to dataframe

s2_cols = ['b2', 'b3', 'b4', 'b8', 'b11', 'b12']

for i, col in enumerate(s2_cols):
    data_df[col] = s2_flatten_arrs[i].T

data_df

In [None]:
# drop entries (rows) containing NaN data

filt_df = data_df.dropna()
filt_df

In [None]:
predictor_list = ['vv', 'vh']
print(predictor_list)

In [None]:
# seperate dataframe into predictors and target for model-building

predictors = filt_df[predictor_list]
target = filt_df['lst']

print(predictors)
print()
print(target)

## building model

In [None]:
x_train, x_test, y_train, y_test = train_test_split(predictors, target,
                                                    test_size=0.2,
                                                    random_state=7)

In [None]:
# check what pixels are selected for training, testing and validation

train = y_train.isin(data_df['lst']).astype('int')
test = y_test.isin(data_df['lst']).astype('int')

data_df['test'] = test
data_df['train'] = train

train_px = data_df.train.values
train_px = train_px.reshape(-1, 131)

test_px = data_df.test.values
test_px = test_px.reshape(-1, 131)

figure(figsize=(20, 20), dpi=300)

plt.subplot(1, 3, 1)
plt.imshow(train_px, cmap='Greys_r')
plt.title('Training Set')

plt.subplot(1, 3, 2)
plt.imshow(test_px, cmap='Greys_r')
plt.title('Testing Set')

plt.show()

In [None]:
# make a grid with values for hyperparameters

n_estimators = [int(x) for x in np.linspace(start=50, stop=2000, num=20)]
max_depth = [int(x) for x in np.linspace(5, 30, num=10)]
max_depth.append(None)
min_samples_split = np.random.randint(low=2, high=20, size=10)
min_samples_leaf = np.random.randint(low=1, high=10, size=10)
max_features = ['sqrt', 'log2', None]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'max_features': max_features}
print(random_grid)

In [None]:
forest = RandomForestRegressor()

rf_random = RandomizedSearchCV(estimator=forest,
                               param_distributions=random_grid,
                               n_iter=10,
                               cv=5, scoring='neg_root_mean_squared_error',
                               return_train_score=True,
                               random_state=5,
                               n_jobs=-1)

rf_random.fit(predictors, target)

In [None]:
print(rf_random.best_params_)

In [None]:
tuned_forest = rf_random.best_estimator_

In [None]:
lst_pred = tuned_forest.predict(predictors)
full_rmse = np.sqrt(mean_squared_error(lst_pred, target))
print(full_rmse)

In [None]:
gfilt_df = filt_df.copy()
gfilt_df['lst_pred'] = lst_pred
gfilt_df

In [None]:
print(gfilt_df['lst_pred'].describe())
print()
print(gfilt_df['lst'].describe())

In [None]:
gfilt_df = gfilt_df['lst_pred']
finaldata_df = data_df.join(gfilt_df)
finaldata_df

In [None]:
# check model prediction at 1000 m vs original data at 1000 m

hist_df_1000 = finaldata_df[['lst', 'lst_pred']]

figure(figsize=(14, 12), dpi=300)
hist_plot = hist_df_1000.plot.hist(bins=200, legend=True, alpha=0.5)
fig = hist_plot.get_figure()
fig.show()

In [None]:
print(finaldata_df['lst'].corr(finaldata_df['lst_pred']))

In [None]:
pred_arr = finaldata_df['lst_pred'].values.reshape(-1, 131)
print(pred_arr.shape)

In [None]:
# plots of predicted vs observed lst at 1000 m

figure(figsize=(14, 12), dpi=300)

plt.subplot(1, 2, 1)
plt.title('Predicted LST map')
plt.imshow(pred_arr[:, :], cmap='RdBu_r')
plt.colorbar(orientation='horizontal')

plt.subplot(1, 2, 2)
plt.title('Observed LST map')
plt.imshow(lst_arr[:, :], cmap='RdBu_r')
plt.colorbar(orientation='horizontal')

plt.show()

In [None]:
# error image at 1000 m

error_arr = np.sqrt(np.square(pred_arr - lst_arr))

figure(figsize=(14, 12), dpi=150)
plt.title('Error at each pixel')
plt.imshow(error_arr, cmap='RdYlGn_r')
plt.colorbar(orientation='horizontal')
plt.show()

## downscaling 100 m

In [None]:
# save the features as a dataframe

lstfull_flat = lst_full_farr.flatten()
vvfull_flat = m_vv_full_arr.flatten()
vhfull_flat = m_vh_full_arr.flatten()

fulldata_df = pd.DataFrame(lstfull_flat.T, columns=['lst'])
fulldata_df['vv'] = vvfull_flat.T
fulldata_df['vh'] = vhfull_flat.T
fulldata_df[vv_nbr_list] = np.concatenate(fnborlist_sar_100[0])
fulldata_df[vh_nbr_list] = np.concatenate(fnborlist_sar_100[1])

fulldata_df = fulldata_df.join(lulc_df_100)
fulldata_df

In [None]:
# save s2 data to dataframe

for i, col in enumerate(s2_cols):
    fulldata_df[col] = s2_full_arrs_100[i]

fulldata_df

In [None]:
filt_fulldata_df = fulldata_df.dropna()
filt_fulldata_df

In [None]:
fullsar = filt_fulldata_df[predictor_list]
fulllst = filt_fulldata_df['lst']

print(fullsar)
print(fulllst)

In [None]:
fulllst_pred = tuned_forest.predict(fullsar)

In [None]:
gfilt_fulldata_df = filt_fulldata_df.copy()
gfilt_fulldata_df['lst_pred'] = fulllst_pred
gfilt_fulldata_df

In [None]:
gfilt_fulldata_df = gfilt_fulldata_df['lst_pred']
n_fulldata_df = fulldata_df.join(gfilt_fulldata_df)
n_fulldata_df

## residual correction

In [None]:
# collect residuals

tmp_dlst_arr_100 = n_fulldata_df.lst_pred.values.reshape(-1, 1310)

fulllst_pred_1000 = tmp_dlst_arr_100.reshape(-1, 10, 131, 10)
fulllst_pred_1000 = np.median(fulllst_pred_1000, (-1, -3))
print(fulllst_pred_1000.shape)

# residuals

res_1000 = lst_arr - fulllst_pred_1000
print(res_1000.shape)

res_100 = res_1000.repeat(10, 0).repeat(10, 1)
print(res_100.shape)
print(res_100)

plt.imshow(res_1000)
plt.colorbar()

In [None]:
n_fulldata_df['residuals'] = res_100.flatten().T
n_fulldata_df

In [None]:
n_fulldata_df['lst_pred_res'] = n_fulldata_df['lst_pred'] + n_fulldata_df['residuals']
n_fulldata_df

## some basic evaluation

In [None]:
fulllstpred_arr = n_fulldata_df.lst_pred_res.values.reshape(-1, 1310)
reflstpred_arr = n_fulldata_df.lst.values.reshape(-1, 1310)

min_min = np.nanmin(fulllstpred_arr)
max_max = np.nanmax(fulllstpred_arr)


# plot downscaled LST map

figure(figsize=(14, 12), dpi=150)
plt.imshow(fulllstpred_arr, vmin=min_min, vmax=max_max, cmap='RdBu_r')
plt.colorbar(orientation='horizontal')
plt.show()

In [None]:
figure(figsize=(14, 12), dpi=300)

plt.subplot(1, 2, 1)
plt.imshow(fulllstpred_arr, vmin=min_min, vmax=max_max, cmap='RdBu_r')
plt.title('Downscaled LST map')
plt.colorbar(orientation='horizontal')

plt.subplot(1, 2, 2)
plt.imshow(reflstpred_arr, vmin=min_min, vmax=max_max, cmap='RdBu_r')
plt.title('Original LST map')
plt.colorbar(orientation='horizontal')

plt.show()

In [None]:
# error for each pixel

error_full_arr = np.sqrt(np.square(fulllstpred_arr - lst_full_farr))

figure(figsize=(14, 12), dpi=150)
plt.title('Error at each pixel')
plt.imshow(error_full_arr, cmap='RdYlGn_r')
plt.colorbar(orientation='horizontal')
plt.show()

In [None]:
# correlation coefficient

corr_data_df = n_fulldata_df[['lst', 'lst_pred', 'lst_pred_res']]
corr_data_df = corr_data_df.dropna()

print('The correlation between Observed LST and Downscaled LST at 100m is:',
      corr_data_df['lst'].corr(corr_data_df['lst_pred_res']))

In [None]:
# RMSE

rmse_before = ((corr_data_df.lst_pred - corr_data_df.lst) ** 2).mean() ** .5
print('RMSE before residual correction:', rmse_before)

rmse_after = ((corr_data_df.lst_pred_res - corr_data_df.lst) ** 2).mean() ** .5
print('RMSE after residual correction:', rmse_after)

In [None]:
# r2 and scatterplot
print(predictor_list)

# calculate regression line using scipy.stats.linregress
slope, intercept, r_value, p_value, std_err = linregress(corr_data_df['lst'], corr_data_df['lst_pred_res'])
regress_line = slope * corr_data_df['lst'] + intercept

r_squared = r_value ** 2
print('R^2:', r_squared)

# plot scatterplot with regression line
plt.scatter(corr_data_df['lst'], corr_data_df['lst_pred_res'], edgecolors='black', facecolors='none', linewidths=0.5)
plt.plot(corr_data_df['lst'], regress_line, color='red')
plt.xlabel('Reference LST')
plt.ylabel('Downscaled LST')
plt.show()

In [None]:
# histogram plot
hist_df = n_fulldata_df[['lst', 'lst_pred_res']]

figure(figsize=(14, 12), dpi=300)
hist_plot = hist_df.plot.hist(bins=200, legend=True, alpha=0.5)
fig = hist_plot.get_figure()
fig.show()

In [None]:
corr_data_df_full = pd.DataFrame(data=None, columns=corr_data_df.columns, index=n_fulldata_df.index)
corr_data_df_full

In [None]:
corr_data_df_full = corr_data_df_full.combine_first(corr_data_df)
corr_data_df_full

## save as tif for further evaluation

In the dataframe 'corr_data_df_full', the column 'lst' refers to original Landsat-8 LST (100 m), the column 'lst_pred' refers to downscaled LST (100 m) without residual correction, and the column 'lst_pred_res' refers to downscaled LST (100 m) with residual correction

In [None]:
def save_as_tif(img_name, src_arr, mask_img):
    mask_data = gdal.Open(mask_img)
    driverTiff = gdal.GetDriverByName('GTiff')
    ds = driverTiff.Create(img_name,
                           mask_data.RasterXSize, mask_data.RasterYSize,
                           1, gdal.GDT_Float32)
    ds.SetGeoTransform(mask_data.GetGeoTransform())
    ds.SetProjection(mask_data.GetProjection())
    ds.GetRasterBand(1).SetNoDataValue(-9999.0)
    ds.GetRasterBand(1).WriteArray(src_arr)
    ds = None

In [None]:
dlst_arr = corr_data_df_full.lst_pred.values.reshape(-1, 1310)
dlst_arr_res = corr_data_df_full.lst_pred_res.values.reshape(-1, 1310)
l8 = corr_data_df_full.lst.values.reshape(-1, 1310)

In [None]:
# save_as_tif('dlst_res_2503.tif', dlst_arr_res, 'data/saving_mask_100.tif')