In [None]:
import numpy as np
import pandas as pd
import sklearn as sk
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from torch.utils.data import DataLoader, TensorDataset
import torch
import torch.nn as nn
import torch.nn.functional as F
from IPython.display import clear_output

In [129]:
data = pd.read_csv(r"C:\Users\Mayur\Documents\College\4th sem\Exploratory\Data\Sentinel1_MODIS_SM_Masked_Urban_YellowRiver_11km.csv")

In [130]:
def clean_data(data):
    # Drop rows with missing data in columns: 'LAI', 'SoilMoisture' and 3 other columns
    data = data.dropna(subset=['LAI', 'SoilMoisture', 'VH', 'VV', 'date'])
    # Group by 'date' and calculate the average of numeric columns
    data = data.groupby('date').mean(numeric_only=True).reset_index()
    # Drop column: 'SoilRoughness_placeholder'
    data = data.drop(columns=['SoilRoughness_placeholder'])
    # Drop column: 'Frequency_GHz'
    data = data.drop(columns=['Frequency_GHz'])
    # Extract year, month, and day from the 'date' column
    data['Year'] = pd.to_datetime(data['date']).dt.year
    data['Month'] = pd.to_datetime(data['date']).dt.month
    data['Day'] = pd.to_datetime(data['date']).dt.day
    # Drop column: 'date'
    data = data.drop(columns=['date'])
    # Drop column: 'Year'
    data = data.drop(columns=['Year'])
    # Convert Month and Day columns to numeric
    data['Month'] = pd.to_numeric(data['Month'], errors='coerce')
    data['Day'] = pd.to_numeric(data['Day'], errors='coerce')
    # Add two new columns for Sin and Cos transformations of Month
    data['Month_Sin'] = np.sin(2 * np.pi * (data['Month'] / 12))
    data['Month_Cos'] = np.cos(2 * np.pi * (data['Month'] / 12))
    # Add sin and cos transformations of Day
    data['Day_Sin'] = np.sin(2 * np.pi * (data['Day'] / 30))
    data['Day_Cos'] = np.cos(2 * np.pi * (data['Day'] / 30))
    # Drop column: 'Day'
    data = data.drop(columns=['Day'])
    # Drop column: 'Month'
    data = data.drop(columns=['Month'])
    # Convert VV and VH from decibels to linear
    data['VV'] = 10 ** (data['VV'] / 10)
    data['VH'] = 10 ** (data['VH'] / 10)
    # Scale VV and VH normally
    scaler_vv_vh = StandardScaler()
    data[['VV', 'VH']] = scaler_vv_vh.fit_transform(data[['VV', 'VH']])
    # Scale SoilMoisture normally
    scaler_sm = StandardScaler()
    data['SoilMoisture'] = scaler_sm.fit_transform(data[['SoilMoisture']])
    # Scale IncidenceAngle with 1/10 importance of SoilMoisture
    data['IncidenceAngle'] = data['IncidenceAngle'] * 0.1 / data['SoilMoisture'].std()
    # Scale LAI with 0.75 importance of SoilMoisture
    data['LAI'] = data['LAI'] * 0.75 / data['SoilMoisture'].std()
    # Sin and Cos columns remain unchanged
    # (No operation needed for Month_Sin, Month_Cos, Day_Sin, Day_Cos)
    # Ensure the result is a DataFrame
    data = pd.DataFrame(data)
    return data

data_clean = clean_data(data.copy())
data_clean.head()

Unnamed: 0,IncidenceAngle,LAI,SoilMoisture,VH,VV,Month_Sin,Month_Cos,Day_Sin,Day_Cos
0,4.172704,0.372937,-0.926507,-0.316372,0.775638,1.224647e-16,-1.0,0.866025,0.5
1,3.552232,0.500332,-1.288683,0.919617,0.998619,1.224647e-16,-1.0,-0.406737,-0.913545
2,3.553397,0.621952,-0.326103,4.863161,3.679401,1.224647e-16,-1.0,-0.207912,0.978148
3,3.552746,1.093448,-0.563124,0.889493,1.093552,-0.5,-0.866025,0.743145,-0.669131
4,4.595357,1.166352,-0.805235,0.651093,0.7536,-0.5,-0.866025,-0.207912,-0.978148


In [131]:
feature_cols = ['VV', 'VH', 'IncidenceAngle', 'Month_Sin', 'Month_Cos', 'Day_Sin', 'Day_Cos']
target_cols = ['LAI', 'SoilMoisture']

X = data_clean[feature_cols].values
y = data_clean[target_cols].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

r2_lai = r2_score(y_test[:, 0], y_pred[:, 0])
r2_sm = r2_score(y_test[:, 1], y_pred[:, 1])

print(f"Random Forest - LAI R²: {r2_lai:.4f}, SM R²: {r2_sm:.4f}")

Random Forest - LAI R²: 0.5205, SM R²: 0.0650


In [133]:
mlp = MLPRegressor(hidden_layer_sizes=(64, 32), max_iter=5000, random_state=42)
mlp.fit(X_train, y_train)

y_pred = mlp.predict(X_test)

r2_lai = r2_score(y_test[:, 0], y_pred[:, 0])
r2_sm = r2_score(y_test[:, 1], y_pred[:, 1])

print(f"MLP — LAI R²: {r2_lai:.4f}, SM R²: {r2_sm:.4f}")

MLP — LAI R²: 0.4811, SM R²: -0.4191


In [134]:
from sklearn.utils import resample
from sklearn.neural_network import MLPRegressor


def train_ensemble(X_train, y_train, n_models=5):
    ensemble = []
    for _ in range(n_models):
        X_res, y_res = resample(X_train, y_train)
        model = MLPRegressor(hidden_layer_sizes=(64, 32), max_iter=2000, random_state=None)
        model.fit(X_res, y_res)
        ensemble.append(model)
    return ensemble


def ensemble_predict(models, X):
    preds = np.array([m.predict(X) for m in models])
    return preds.mean(axis=0), preds.std(axis=0)


ensemble = train_ensemble(X_train, y_train, n_models=10)
y_mean, y_std = ensemble_predict(ensemble, X_test)

r2_lai = r2_score(y_test[:, 0], y_mean[:, 0])
r2_sm = r2_score(y_test[:, 1], y_mean[:, 1])

print(f"Ensemble — LAI R²: {r2_lai:.4f}, SM R²: {r2_sm:.4f}")
print("Predictive std (first few):", y_std[:5])

Ensemble — LAI R²: 0.5576, SM R²: -0.3671
Predictive std (first few): [[0.38931538 0.66225983]
 [0.24770027 0.91559111]
 [0.16370809 0.28241463]
 [0.30019186 0.35639699]
 [0.17563445 0.14012996]]


Monte Carlo Dropout with PyTorch

In [135]:
class MCDropoutMLP(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_rate=0.2):
        super(MCDropoutMLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.out = nn.Linear(32, output_dim)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # Dropout ON during train and test
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        return self.out(x)

In [136]:
# Convert data to torch tensors
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(X_train_torch, y_train_torch), batch_size=16, shuffle=True)

model = MCDropoutMLP(input_dim=X.shape[1], output_dim=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Train
for epoch in range(300):
    for xb, yb in train_loader:
        optimizer.zero_grad()
        loss = loss_fn(model(xb), yb)
        loss.backward()
        optimizer.step()

In [137]:
r2_lai = r2_score(y_test[:, 0], y_mean[:, 0])
r2_sm = r2_score(y_test[:, 1], y_mean[:, 1])

print(f"MC Dropout PyTorch — LAI R²: {r2_lai:.4f}, SM R²: {r2_sm:.4f}")
print("Predictive std (first few):\n", y_std[:5])

MC Dropout PyTorch — LAI R²: 0.5576, SM R²: -0.3671
Predictive std (first few):
 [[0.38931538 0.66225983]
 [0.24770027 0.91559111]
 [0.16370809 0.28241463]
 [0.30019186 0.35639699]
 [0.17563445 0.14012996]]


In [138]:
print(f"X_train_tensor shape: {X_train_torch.shape}")

X_train_tensor shape: torch.Size([362, 7])


In [139]:
import torch
import torch.nn as nn
import torch.optim as optim


class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, (hn, cn) = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])  # Take the output from the last time step
        return out


# Hyperparameters
input_size = 7  # Number of input features
hidden_size = 64
output_size = 2  # Predicting LAI or SM

# Example input (batch_size=362, seq_len=7, input_size=7)
X_train_tensor = X_train_torch
X_train_tensor = X_train_tensor.unsqueeze(1)  # Add a sequence dimension
y_train_tensor = y_train_torch 

# Model, loss, optimizer
LSTM_model = LSTMModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 50
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [10/50], Loss: 0.8619
Epoch [20/50], Loss: 0.7954
Epoch [30/50], Loss: 0.7719
Epoch [40/50], Loss: 0.7594
Epoch [50/50], Loss: 0.7502


  return F.mse_loss(input, target, reduction=self.reduction)


In [151]:
LSTM_model.eval()  # Switch to evaluation mode
with torch.no_grad():
    # Example for inference
    y_pred_lstm = LSTM_model(X_test_torch.unsqueeze(1))  # X_test_tensor is your test data
    y_true = y_test_torch  # Actual values of LAI or SM

# Calculate R2 score
r2_lstm = r2_score(y_test_torch, (y_pred_lstm.numpy()))
print(f"LSTM Model R^2: {r2_lstm:.4f}")

LSTM Model R^2: -0.7460


In [141]:
class Generator(nn.Module):
    def __init__(self, noise_dim, output_dim):
        super(Generator, self).__init__()
        self.fc1 = nn.Linear(noise_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)

    def forward(self, z):
        z = torch.relu(self.fc1(z))
        z = torch.relu(self.fc2(z))
        return torch.tanh(self.fc3(z))


class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return torch.sigmoid(self.fc3(x))


# Hyperparameters
noise_dim = 100
input_dim = 7  # Assuming we're generating input features like LAI or SM
lr = 0.0002

# Initialize models
generator = Generator(noise_dim, input_dim)
discriminator = Discriminator(input_dim)

# Loss and optimizers
criterion = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))

# Training loop for GAN
epochs = 50
for epoch in range(epochs):
    # Train Discriminator
    real_data = torch.randn(32, input_dim)  # Real data (e.g., from dataset)
    fake_data = generator(torch.randn(32, noise_dim))

    optimizer_D.zero_grad()
    real_loss = criterion(discriminator(real_data), torch.ones(32, 1))
    fake_loss = criterion(discriminator(fake_data.detach()), torch.zeros(32, 1))
    d_loss = real_loss + fake_loss
    d_loss.backward()
    optimizer_D.step()

    # Train Generator
    optimizer_G.zero_grad()
    g_loss = criterion(discriminator(fake_data), torch.ones(32, 1))
    g_loss.backward()
    optimizer_G.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}')

Epoch [10/50], D Loss: 1.3432, G Loss: 0.6777
Epoch [20/50], D Loss: 1.3215, G Loss: 0.6679
Epoch [30/50], D Loss: 1.3055, G Loss: 0.6598
Epoch [40/50], D Loss: 1.2834, G Loss: 0.6568
Epoch [50/50], D Loss: 1.2425, G Loss: 0.6706
