In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
from scipy.optimize import least_squares, basinhopping, differential_evolution, minimize
import scipy.constants
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import KFold
from IPython.display import clear_output

Constants:

In [8]:
f = 5.405e9
c = scipy.constants.c
wavelength = c / f
k = 2 * np.pi * f / c
s = 0.0097
sbl = 0.013
theta = 40
theta = np.deg2rad(theta)

In [9]:
data = pd.read_csv(r"../Data/NorthChinaPlain_SAR_MODIS_LAI_SM_Daily_MAX.csv")

In [10]:
def clean_data(data):
    # Drop rows with missing data in column: 'SoilMoisture'
    data = data.dropna(subset=['SoilMoisture'])
    # Average numeric columns for duplicate dates
    data = data.groupby('date', as_index=False).mean(numeric_only=True)
    return data


data = clean_data(data.copy())
data.head()

Unnamed: 0,date,LAI,SoilMoisture,VH,VV
0,2015-04-01,0.688023,0.165065,-16.292969,-7.430981
1,2015-04-25,1.271941,0.173209,-18.557225,-10.515813
2,2015-05-19,1.27993,0.168702,-18.18164,-10.299894
3,2015-05-21,1.295764,0.167299,-31.599209,-25.117549
4,2015-05-24,1.096785,0.164336,-16.618521,-8.849678


In [11]:

trim_number = 4

mean_vv = data['VV'].mean()
std_vv = data['VV'].std()

data_trimmed = data[(data['VV'] >= mean_vv - trim_number * std_vv) & (data['VV'] <= mean_vv + trim_number * std_vv)]


mean_vh = data['VH'].mean()

std_vh = data['VH'].std()

data_trimmed = data[(data['VH'] >= mean_vh - trim_number * std_vh) & (data['VH'] <= mean_vh + trim_number * std_vv)]

VV_dB = data_trimmed['VV'].values
VH_dB = data_trimmed['VH'].values
SM = data_trimmed['SoilMoisture'].values
LAI = data_trimmed['LAI'].values

Trimmed the data at dates which Avoids Rainfall and gets most bare soil

In [12]:
# Convert the 'Date' column to datetime format
data_trimmed['date'] = pd.to_datetime(data_trimmed['date'])

# Define the start and end dates for filtering
start_date = '2016-04-1'
end_date = '2016-06-31'

# Filter the data based on the date range
data_segmented = data_trimmed[(data_trimmed['date'] >= start_date) & (data_trimmed['date'] <= end_date)]

# Extract the relevant columns after filtering
VV_dB = data_segmented['VV'].values
VH_dB = data_segmented['VH'].values
SM = data_segmented['SoilMoisture'].values
LAI = data_segmented['LAI'].values

# Ensure all arrays have the same length
min_length = min(len(VH_dB), len(LAI), len(SM))
VH_dB = VH_dB[:min_length]
LAI = LAI[:min_length]
SM = SM[:min_length]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_trimmed['date'] = pd.to_datetime(data_trimmed['date'])


TypeError: Invalid comparison between dtype=datetime64[ns] and str

In [None]:
VV_linear = 10**(VV_dB / 10)
VH_linear = 10**(VH_dB / 10)

In [None]:
"""scaler_vv = MinMaxScaler()
scaler_vh = MinMaxScaler()
scaler_sm = MinMaxScaler()
scaler_lai = MinMaxScaler()

VV_dB = scaler_vv.fit_transform(VV_dB.reshape(-1, 1)).flatten()
VH_dB = scaler_vh.fit_transform(VH_dB.reshape(-1, 1)).flatten()
SM = scaler_sm.fit_transform(SM.reshape(-1, 1)).flatten()
LAI = scaler_lai.fit_transform(LAI.reshape(-1, 1)).flatten()"""

'scaler_vv = MinMaxScaler()\nscaler_vh = MinMaxScaler()\nscaler_sm = MinMaxScaler()\nscaler_lai = MinMaxScaler()\n\nVV_dB = scaler_vv.fit_transform(VV_dB.reshape(-1, 1)).flatten()\nVH_dB = scaler_vh.fit_transform(VH_dB.reshape(-1, 1)).flatten()\nSM = scaler_sm.fit_transform(SM.reshape(-1, 1)).flatten()\nLAI = scaler_lai.fit_transform(LAI.reshape(-1, 1)).flatten()'

L Prevot Original 1993 WCM:


In [None]:
def wcm_1993_sigma_0(P1, P2, P3, P4, P5, L, S):
    #P5 > 0 
    t2 = np.exp(-2 * P2 * L / np.cos(theta))
    sigma_veg = P1 * np.power(L, P5) * np.cos(theta) * (1 - t2) #Including the (1-t2) gets less r2_Score what???
    sigma_soil = P3+P4*S
    return sigma_veg+(t2*sigma_soil)

In [None]:
def wcm_1993_validate_optimizer(optimizer_func, data):

    params_VV = optimizer_func(VV_dB, LAI, SM)
    predicted_VV = wcm_1993_sigma_0(*params_VV, LAI, SM)

    r2_VV = r2_score(VV_dB, predicted_VV)


    params_VH = optimizer_func(VH_dB, LAI, SM)
    predicted_VH = wcm_1993_sigma_0(*params_VH, LAI, SM)

    r2_VH = r2_score(VH_dB, predicted_VH)

    clear_output()
    print(f"R2 Score For VV:{np.median(r2_VV)}")
    print(f"R2 Score For VH:{np.median(r2_VH)}")
    return (tuple(params_VV), tuple(params_VH))

In [None]:
def optimize_wcm_1993_sigma_0_ls(polarization, L, S):
    def residuals(params):
        predicted = wcm_1993_sigma_0(*params, L, S)
        residuals = predicted - polarization
        if not np.all(np.isfinite(residuals)):
            return np.inf 
        return residuals
    initial_guess = [0.1, 1.3, 1.2, 0.9, 0.8]
    result = least_squares(residuals, initial_guess, method='trf', loss='soft_l1', max_nfev=10000)
    return result.x

In [None]:
wcm_1993_params_vv, wcm_1993_params_vh = wcm_1993_validate_optimizer(optimize_wcm_1993_sigma_0_ls, data_segmented)

R2 Score For VV:0.7580455656177528
R2 Score For VH:0.3089043193212241


In [None]:
def invert_wcm_1993_sigma_0(backscatter, params):
    P1, P2, P3, P4, P5 = params

    def residuals(x, obs_backscatter):
        L, S = x  # L = LAI, S = SM
        predicted = wcm_1993_sigma_0(P1, P2, P3, P4, P5, L, S)
        return predicted - obs_backscatter

    # Initial guesses for LAI and SM
    initial_guess = [1.0, 0.2]  # Adjust based on expected ranges

    # Bounds for LAI and SM
    bounds = ([0, 0], [10, 1])  # LAI in [0, 10], SM in [0, 1]

    # Store results
    estimated_values = []

    for obs_backscatter in backscatter:
        result = least_squares(residuals, initial_guess, bounds=bounds, args=(obs_backscatter,))
        if result.success:
            estimated_values.append({"LAI": result.x[0], "SM": result.x[1]})
        else:
            estimated_values.append({"LAI": None, "SM": None})

    return estimated_values


# Example usage
theta = np.deg2rad(40)  # Incidence angle in radians

# Invert VV_dB to estimate SM and LAI
inverted_values_vv = invert_wcm_1993_sigma_0(VV_dB, wcm_1993_params_vv)

# Convert results to a DataFrame for better visualization
inverted_df_vv = pd.DataFrame(inverted_values_vv)
r2_lai = r2_score(LAI[:len(inverted_df_vv)], inverted_df_vv['LAI'].dropna())
r2_sm = r2_score(SM[:len(inverted_df_vv)], inverted_df_vv['SM'].dropna())

# Print the R2 scores
print(f"R2 Score for LAI: {r2_lai}")
print(f"R2 Score for SM: {r2_sm}")

inverted_values_vh = invert_wcm_1993_sigma_0(VH_dB, wcm_1993_params_vh)

# Convert results to a DataFrame for better visualization
inverted_df_vh = pd.DataFrame(inverted_values_vh)
r2_lai = r2_score(LAI[:len(inverted_df_vh)], inverted_df_vh['LAI'].dropna())
r2_sm = r2_score(SM[:len(inverted_df_vh)], inverted_df_vh['SM'].dropna())

# Print the R2 scores
print(f"R2 Score for LAI: {r2_lai}")
print(f"R2 Score for SM: {r2_sm}")

R2 Score for LAI: -0.27098955658497026
R2 Score for SM: -5.91801534652686
R2 Score for LAI: 0.4461228300472334
R2 Score for SM: -1.3544541131324062


In [None]:
def optimize_wcm_1993_sigma_0_VV_basin(polarization, L, S):
    def residuals(params):
        predicted = wcm_1993_sigma_0(*params, L, S)
        residuals = predicted - polarization
        return np.sum(residuals**2)  

    initial_guess = [0.1, 1.3, 1.2, 0.9, 0.8]
    result = basinhopping(residuals, initial_guess)
    return result.x

In [None]:
#For some reason basin Hopping REALLY DOES NOT LIKE THE FUNCTION
#wcm_1993_validate_optimizer(optimize_wcm_1993_sigma_0_VV_basin, data_trimmed)

In [None]:
def optimize_wcm_1993_sigma_0_VV_de_hybrid(polarization, L, S):
    def residuals(params):
        predicted = wcm_1993_sigma_0(*params, L, S)
        residuals = predicted - polarization
        return np.sum(residuals**2)  

    bounds = [(-10, 10), (-10, 10), (-10, 10), (-10, 10), (-10, 10)]  
    result_de = differential_evolution(
        residuals,
        bounds,
        maxiter=1000,  
        popsize=20,    
        tol=1e-6       
    )
    result_local = minimize(residuals, result_de.x, method='L-BFGS-B', bounds=bounds)
    return result_local.x

In [None]:
wcm_1993_validate_optimizer(optimize_wcm_1993_sigma_0_VV_de_hybrid, data_trimmed)

R2 Score For VV:0.31116239839870763
R2 Score For VH:-0.27515464365877507


((np.float64(-10.0),
  np.float64(0.03729181058773102),
  np.float64(-10.0),
  np.float64(2.549081516864716),
  np.float64(-1.3383156202799733)),
 (np.float64(-7.046897309721912),
  np.float64(-0.2379008222674929),
  np.float64(-10.0),
  np.float64(-10.0),
  np.float64(0.7879750139291676)))

Using OH for VV as model performs poorly with VV

We will use Dubois Model Now

In [None]:
def dubois(P1, P2, P3, P4, theta, wavelength, sm, roughness):
    return 10*np.log10(P1*np.power((np.sin(theta)/wavelength), P2)*np.power(sm, P3)*np.exp(P4*roughness))

In [None]:
def dubois_validate_optimizer(optimizer_func, data):

    params_VV = optimizer_func(VV_dB, theta, wavelength, SM, s)
    predicted_VV = dubois(*params_VV, theta, wavelength, SM, s)

    r2_VV = r2_score(VV_dB, predicted_VV)

    params_VH = optimizer_func(VH_dB, theta, wavelength, SM, s)
    predicted_VH = dubois(*params_VH, theta, wavelength, SM, s)

    r2_VH = r2_score(VH_dB, predicted_VH)

    clear_output()
    print(f"R2 Score For VV:{np.median(r2_VV)}")
    print(f"R2 Score For VH:{np.median(r2_VH)}")
    return (predicted_VV, predicted_VH)

In [None]:
def optimize_dubois_ls(polarization, theta, wavelength, sm, roughness):
    def residuals(params):
        predicted = dubois(*params, theta=theta, wavelength=wavelength, sm=sm,  roughness=roughness)
        residuals = predicted - polarization
        if not np.all(np.isfinite(residuals)):
            return np.inf
        return residuals

    initial_guess = [0.11, 0.7, 1.5, 0.2]
    result = least_squares(residuals, initial_guess, method='trf', loss='soft_l1', max_nfev=10000)
    return result.x

In [None]:
dubois_validate_optimizer(optimize_dubois_ls, data)

R2 Score For VV:0.6287818446187787
R2 Score For VH:0.6406736694654751


(array([ -9.85370021,  -9.98654847,  -9.92417675, -10.12840596,
        -10.01114385,  -9.1560243 ,  -8.80059455,  -9.01530855,
         -8.31733723,  -7.42402627]),
 array([-17.94841317, -18.15848405, -18.05985663, -18.38280109,
        -18.19737635, -16.84518913, -16.28315362, -16.62267751,
        -15.51898635, -14.10640766]))

Combining Dubois Model with WCM

In [None]:
def wcm_dubois(P1, P2, P5, P6, P7, P8, P9, L, S, theta, wavelength, roughness):
    # P5 > 0
    t2 = np.exp(-2 * P2 * L / np.cos(theta))
    sigma_veg = P1 * np.power(L, P5) * np.cos(theta)  # * (1 - t2) #Including the (1-t2) gets less r2_Score what???
    sigma_soil = 10 * np.log10(P6 * np.power((np.sin(theta) / wavelength), P7) * np.power(S, P8) * np.exp(P9 * roughness))
    return sigma_veg + (t2 * sigma_soil)

In [None]:
def wcm_dubois_validate_optimizer(optimizer_func):

    params_VV = optimizer_func(VV_dB, LAI, SM, theta, wavelength, s)
    predicted_VV = wcm_dubois(*params_VV, LAI, SM, theta, wavelength, s)

    r2_VV = r2_score(VV_dB, predicted_VV)

    params_VH = optimizer_func(VH_dB, LAI, SM, theta, wavelength, s)
    predicted_VH = wcm_dubois(*params_VH, LAI, SM, theta, wavelength, s)

    r2_VH = r2_score(VH_dB, predicted_VH)

    clear_output()
    print(f"R2 Score For VV:{np.median(r2_VV)}")
    print(f"R2 Score For VH:{np.median(r2_VH)}")
    return (predicted_VV, predicted_VH)

In [None]:
def validate_optimizer_wcm_dubois(optimizer_func):
    params_VV = optimizer_func(VV_dB, LAI, SM, theta, wavelength, s)
    predicted_VV = wcm_dubois(*params_VV, LAI, SM, theta, wavelength, s)
    r2_VV = r2_score(VV_dB, predicted_VV)

    params_VH = optimizer_func(VH_dB, LAI, SM, theta, wavelength, s)
    predicted_VH = wcm_dubois(*params_VH, LAI, SM, theta, wavelength, s)
    r2_VH = r2_score(VH_dB, predicted_VH)
    
    clear_output()
    print(f"R2 Score For VV:{np.median(r2_VV)}")
    print(f"R2 Score For VH:{np.median(r2_VH)}")
    

In [None]:
def optimize_wcm_dubois_ls(polarization, L, S, theta, wavelength, roughness):
    def residuals(params):
        try:
            predicted = wcm_dubois(*params, L, S, theta, wavelength, roughness)
            residuals = predicted - polarization
            if not np.all(np.isfinite(residuals)):
                return np.inf
            return residuals
        except Exception as e:
            print(f"An error occurred: {e}")
            return np.inf

    initial_guess = [1]*7
    result = least_squares(residuals, initial_guess, method='trf', loss='soft_l1', max_nfev=10000)
    return result.x

In [None]:
validate_optimizer_wcm_dubois(optimize_wcm_dubois_ls)

R2 Score For VV:0.7668881918837822
R2 Score For VH:0.7229084449648002


In [None]:
def optimize_wcm_dubois_de_hybrid(polarization, L, S, theta, wavelength, roughness):
    def residuals(params):
        try:
            predicted = wcm_dubois(*params, L, S, theta, wavelength, roughness)
            residuals = predicted - polarization
            if not np.all(np.isfinite(residuals)):
                return np.inf
            return np.sum(residuals**2)
        except Exception as e:
            print(f"An error occurred: {e}")
            return np.inf
    
    bounds = [(-10,10)]*7 
    result_de = differential_evolution(
        residuals,
        bounds,
        maxiter=1000,  
        popsize=20,    
        tol=1e-6       
    )
    result_local = minimize(residuals, result_de.x, method='L-BFGS-B', bounds=bounds)
    return result_local.x

In [None]:
validate_optimizer_wcm_dubois(optimize_wcm_dubois_de_hybrid)

R2 Score For VV:0.7677554860618603
R2 Score For VH:0.7054043004037527


OH 2004 Model

In [None]:
def oh_2004_backscatter(P1, P2, P3, P4, P5, P6, P7, P8, theta, roughness, wavelength, sm):
    epsilon_r = P1+P2*sm+P3*(sm**2)+P4*(sm**3)
    cos_theta = np.cos(theta)

    sigma0_linear = P5 * (cos_theta ** P6) * ((roughness / wavelength) ** P7) * (epsilon_r ** P8)
    sigma0_dB = 10 * np.log10(sigma0_linear)

    return sigma0_dB


In [None]:
def validate_optimizer_oh_2004(optimizer_func):
    
    params_VV = optimizer_func(VV_dB, theta, s, wavelength, SM)
    predicted_VV = oh_2004_backscatter(*params_VV, theta, s, wavelength, SM)
    r2_VV = r2_score(VV_dB, predicted_VV)
    
    params_VH = optimizer_func(VH_dB, theta, s, wavelength, SM)
    predicted_VH = oh_2004_backscatter(*params_VH, theta, s, wavelength, SM)
    r2_VH = r2_score(VH_dB, predicted_VH)
    
    
    
    clear_output()
    print(f"R2 Score For VV:{np.median(r2_VV)}")
    print(f"R2 Score For VH:{np.median(r2_VH)}")
    print(params_VH)
    

In [None]:
def optimize_oh_2004_ls(polarization, theta, roughness, wavelength, sm):
    def residuals(params):
        try:
            predicted = oh_2004_backscatter(*params, theta, roughness, wavelength, sm)
            residuals = predicted - polarization
            if not np.all(np.isfinite(residuals)):
                return np.inf
            return residuals
        except Exception as e:
            print(f"An error occurred: {e}")
            return np.inf
        
    initial_guess = [0.1] * 8
    result = least_squares(residuals, initial_guess, method='trf', loss='soft_l1', max_nfev=10000)
    return result.x



In [None]:
validate_optimizer_oh_2004(optimize_oh_2004_ls)

R2 Score For VV:0.7110606216015922
R2 Score For VH:0.6416740190929665
[ 0.03501365 -0.55176648  2.76151426 -4.19572099  0.41439392  0.10048917
  0.12901702  0.36830767]


In [None]:
def optimize_oh_2004_de_hybrid(polarization, theta, roughness, wavelength, sm):
    def residuals(params):
        try:
            predicted = oh_2004_backscatter(*params, theta, roughness, wavelength, sm)
            residuals = predicted - polarization
            if not np.all(np.isfinite(residuals)):
                return np.inf
            return np.sum(residuals**2)
        except Exception as e:
            print(f"An error occurred: {e}")
            return np.inf

    bounds = [(-50,50)] * 4 + [(-10,10)] * 4
    result_de = differential_evolution(
        residuals,
        bounds,
        strategy='rand1bin',  # Alternative strategy
        maxiter=2000,         # Increase iterations
        popsize=30,           # Larger population size
        mutation=(0.5, 1.5),  # Wider mutation range
        recombination=0.9,    # Higher crossover probability
        tol=1e-8              # Tighter convergence tolerance
    )
    result_local = minimize(residuals, result_de.x, method='L-BFGS-B', bounds=bounds)
    return result_local.x

In [None]:
validate_optimizer_oh_2004(optimize_oh_2004_de_hybrid)

  sigma0_linear = P5 * (cos_theta ** P6) * ((roughness / wavelength) ** P7) * (epsilon_r ** P8)
  sigma0_dB = 10 * np.log10(sigma0_linear)


KeyboardInterrupt: 