In [35]:
from functions import *

In [36]:
import matplotlib.pyplot as plt
import glob
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import geopandas as gpd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
import glob

## Load data

### Choose inputs

In [37]:
source_dir = '../data/preprocessed'
data = ['lst','wt','ndvi']
all_ds = {k: [] for k in data}
for d in data:
    source_path = os.path.join(source_dir,d)
    for subdir, dirs, files in os.walk(source_path):
        for file in files:
            if file.endswith('.csv'):
                all_ds[d].append(pd.read_csv(f'{subdir}/{file}',index_col=0))
                

To choose specific rivers or not

In [38]:
filter_river = 2
if filter_river != None:
    lst_array = all_ds['lst'][filter_river].values.flatten()
    ndvi_array = all_ds['ndvi'][filter_river].values.flatten()
    wt_array = all_ds['wt'][filter_river].values.flatten()
else:
    lst_array = np.concatenate([dataset.values.flatten() for dataset in all_ds['lst']])
    ndvi_array = np.concatenate([dataset.values.flatten() for dataset in all_ds['ndvi']])
    wt_array = np.concatenate([dataset.values.flatten() for dataset in all_ds['wt']])

In [39]:
use_ndvi = True
if use_ndvi:
  array_combined = np.stack((lst_array, ndvi_array), axis=-1)
  X = array_combined

else:
  X = lst_array
  X = X.reshape(-1, 1)

y = wt_array

In [40]:
X.shape

(14380, 2)

## Linear Regression model

In [41]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

### K-fold cross validation

In [42]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

model = LinearRegression()

In [43]:
# Iterate over the folds
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict on the test set
    y_pred = model.predict(X_test)

    total_results = evaluate_model(y_test, y_pred)


In [44]:
total_results

{'MAE': 0.2016718203389694,
 'MSE': 0.07173727205913265,
 'RMSE': 1.1133985378326638,
 'R²': 0.016662767695489134,
 'MAPE (%)': 112012124246.40532,
 'MSE sample-wise': 1.2396563040479138}

### Save model results

In [45]:
model_name = "pixel_wise_LR"
details = {'RMSE':total_results['RMSE'],'Variables':'lst, ndvi','Input': 'scaled la Broye', 'Output': 'wt', 'Resolution': 256, 'nº samples': len(y)}

file_path = f"../results/{model_name}_results.xlsx"
save_excel(file_path, details, excel = 'Results')

total_results['Model'] = model_name
file_path = f"../results/all_results.xlsx"
save_excel(file_path, total_results, excel = 'Results')