In [None]:
def mlp_lr_sm(paths, features, y_label,  ):
    """
    Arguments:
    """
    
    
    # Import modules
    from glob import glob
    import datetime as dt
    import geopandas as gpd
    import pandas as pd
    import seaborn as sns
    
    
    ## Initialize data
    # Files with data
    paths = glob(paths)
    paths.sort()

    # Create Station id linked with filepath
    ids = [x.split('\\')[-1].split('_')[0] for x in paths]
    files = dict(zip(ids,paths))

    # Load all files into one Dataframe
    gdf = gpd.tools.util.pd.concat(map(gpd.read_file, paths), ignore_index=True)
    print(f'Rows before preprocessing {len(gdf)}')
    
    
    ## Preprocessing
    # Set type of date column to datetime object
    gdf.date = gdf.date.astype('datetime64[ns]')

    # Convert int to timedelta in days
    s2_timedelta = [dt.timedelta(days=x) for x in gdf.s2_distance]
    gdf.s2_distance = s2_timedelta

    #Remove uneseccary columns
    gdf.drop(labels = ['CloudMask'], axis = 1, inplace = True)

    # Clean Nan Values within subset columns (major variables to inspect)
    gdf.dropna(how='any', subset=['soil_moisture', 'VV'], inplace=True)

    # Drop lower and upper 1% of data to eliminate outliers
    gdf = gdf[gdf.soil_moisture.gt(gdf.soil_moisture.quantile(0.01)) & gdf.soil_moisture.lt(gdf.soil_moisture.quantile(0.99))]
    gdf = gdf[gdf.VV.gt(gdf.VV.quantile(0.01)) & gdf.VV.lt(gdf.VV.quantile(0.99))]
    gdf = gdf[gdf.NDVI.gt(gdf.NDVI.quantile(0.01)) & gdf.NDVI.lt(gdf.NDVI.quantile(0.99))]

    # Remove rows where ndvi is older than 30days
    gdf = gdf[gdf.s2_distance.gt(dt.timedelta(days=-7)) & gdf.s2_distance.lt(dt.timedelta(days=7))]

    # Select only sm and vv meassurements where soil is not in frozen state
    print(f'Rows after preprocessing {len(gdf)}')
    
    
    ## One Hot Encoding
    # Drop categorical data 
    orbit = gdf.pop('orbit')
    platform = gdf.pop('platform')
    
    gdf['ASCENDING'] = (orbit == 'ASCENDING')*1
    gdf['DESCENDING'] = (orbit == 'DESCENDING')*1

    gdf['Sentinel_A'] = (platform == 'A')*1
    gdf['Sentinel_B'] = (platform == 'B')*1
    
    
    ## Statistics
    #KDE Plot
    plot_kde = sns.pairplot(gdf[features], diag_kind = 'kde')
    print(plot_kde)
    
    # Dataframe Statistics 
    train_stats = gdf[features].describe()
    train_stats.pop(y_label)
    train_stats = train_stats.transpose()
    print(train_stats)
    
    
    ## Split into Train and Test 
    df = pd.DataFrame(gdf[features]).reset_index(drop=True)
    train_df = df.sample(frac = 0.8, random_state=0)
    test_df = df.drop(train_df.index)
    
    train_labels = train_df.pop(y_label)
    test_labels = test_df.pop(y_label)
    
    ## Normalize data
    def norm(x):
        return (x - train_stats['mean']) / train_stats['std']
    
    n_train_df = norm(train_df)
    n_test_df = norm(test_df)
    
    
    ## The Model
    from tensorflow.keras.models import Sequential
    from tensorflow.keras import layers
    from tensorflow.nn import relu
    from tensorflow.keras.optimizers import RMSprop
    from keras.callbacks import Callback
    from tensorflow.keras.callbacks import EarlyStopping
    
    #Architecture 
    def build_model():
        model = Sequential([
            layers.Dense(64, activation=relu, input_shape=[len(n_train_df.keys())]), # densly (fully connected) hidden layer
            layers.Dense(64, activation=relu), # denly hidden layer
            layers.Dense(1) #output layer
        ])

        optimizer = RMSprop(0.001)

        model.compile(
            loss = 'mse', # mean sqared error
            optimizer = optimizer,
            metrics = ['mae', 'mse']) 

        return model
    
    model = build_model()
    
    #Inspect Model
    print(model.summary())
    
    
    ## Train the model
    # Callbacks
    class Calls(Callback):
        #Print dot every epoch while training
        def on_epoch_end(self, epoch, logs):
            if epoch % 100 == 0: print('')
            print('.', end='')

    # stop the training when there is no improvement in the loss for three consecutive epochs.
    stop_improving = EarlyStopping(monitor='val_loss', patience=10)
    
    EPOCHS = 1000

    history = model.fit(
        x = n_train_df,
        y = train_labels,
        epochs = EPOCHS,
        validation_split = 0.2, 
        verbose = 0,
        callbacks = [Calls(), stop_improving])
    
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch
    print(hist.tail())
    
    import matplotlib.pyplot as plt

    def plot_history(hitsory):
        hist = pd.DataFrame(history.history)
        hist['epoch'] = history.epoch

        plt.figure()
        plt.xlabel('Epoch')
        plt.ylabel('Mean Abs Error [MPG]')
        plt.plot(hist['epoch'], hist['mae'],
                label = 'Train Error')
        plt.plot(hist['epoch'], hist['val_mae'],
                label = 'Val Error')
        plt.legend()
        plt.ylim([0,hist['mae'].max() + hist['mae'].max() * 0.5])

        plt.figure()
        plt.xlabel('Epoch')
        plt.ylabel('Mean Square Error [m^3/m^3^2]')
        plt.plot(hist['epoch'], hist['mse'],
                label = 'Train Error')
        plt.plot(hist['epoch'], hist['val_mse'],
                label = 'Val Error')
        plt.legend()
        plt.ylim([0,hist['mse'].max() + hist['mse'].max() * 0.5])

    plot_history(history)
    
    loss, mae, mse = model.evaluate(n_test_df, test_labels, verbose = 0)
    print("Testing set Mean Abs Error: {:5.2f} m^3/m^3".format(mae))
    
    ## Make Predictions
    test_predictions = model.predict(n_test_df).flatten()
    
    plt.figure()
    plt.scatter(test_labels, test_predictions)
    plt.xlabel('True Values m^3/m^3')
    plt.ylabel('Predictions m^3/m^3')
    plt.axis('equal')
    plt.axis('square')
    plt.xlim([0,0.5])
    plt.ylim([0,0.5])
    plot_predict_scatter = plt.plot([-100,100], [-100,100])
    print(plot_predict_scatter)
    
    plt.figure()
    error = test_predictions - test_labels
    plt.hist(error, bins=25)
    plt.xlabel('Prediction Error m^3/m^3')
    plot_error_hist = plt.ylabel('Count')
    print(plot_error_hist)
    

In [None]:
mlp_lr_sm(
    paths = 'C://Users/USER/Desktop/Master_Irrigation/03_GIS/ground_trouth/sentinel_ismn_data/*',
    features = ['VV', 'ASCENDING','DESCENDING','Sentinel_A','Sentinel_B', 'soil_moisture'],
    y_label = 'soil_moisture'
    )

In [None]:
mlp_lr_sm(
    paths = 'C://Users/USER/Desktop/Master_Irrigation/03_GIS/ground_trouth/sentinel_ismn_data/*',
    features = ['VV', 'ASCENDING','DESCENDING','Sentinel_A','Sentinel_B','NDVI', 'soil_moisture'],
    y_label = 'soil_moisture'
    )

In [None]:
mlp_lr_sm(
    paths = 'C://Users/USER/Desktop/Master_Irrigation/03_GIS/ground_trouth/sentinel_ismn_data/*',
    features = ['VV', 'ASCENDING','DESCENDING','Sentinel_A','Sentinel_B','NDVI','angle', 'soil_moisture'],
    y_label = 'soil_moisture'
    )

In [None]:
mlp_lr_sm(
    paths = 'C://Users/USER/Desktop/Master_Irrigation/03_GIS/ground_trouth/sentinel_ismn_data/*',
    features = ['VV', 'ASCENDING','DESCENDING','Sentinel_A','Sentinel_B','NDVI','angle','ismn_id', 'soil_moisture'],
    y_label = 'soil_moisture'
    )

In [None]:
mlp_lr_sm(
    paths = 'C://Users/USER/Desktop/Master_Irrigation/03_GIS/ground_trouth/sentinel_ismn_data/*',
    features = ['VV','VH', 'ASCENDING','DESCENDING','Sentinel_A','Sentinel_B','NDVI','angle','ismn_id', 'soil_moisture'],
    y_label = 'soil_moisture'
    )

In [None]:
mlp_lr_sm(
    paths = 'C://Users/USER/Desktop/Master_Irrigation/03_GIS/ground_trouth/sentinel_ismn_data/*',
    features = ['VV','VH', 'ASCENDING','DESCENDING','Sentinel_A','Sentinel_B','NDVI','angle','ismn_id','WaterVapor', 'soil_moisture'],
    y_label = 'soil_moisture'
    )

In [None]:
mlp_lr_sm(
    paths = 'C://Users/USER/Desktop/Master_Irrigation/03_GIS/ground_trouth/sentinel_ismn_data/*',
    features = ['VV','VH', 'ASCENDING','DESCENDING','Sentinel_A','Sentinel_B','NDVI','angle','ismn_id','WaterVapor','NIR', 'soil_moisture'],
    y_label = 'soil_moisture'
    )

In [None]:
mlp_lr_sm(
    paths = 'C://Users/USER/Desktop/Master_Irrigation/03_GIS/ground_trouth/sentinel_ismn_data/*',
    features = ['VV','VH', 'ASCENDING','DESCENDING','Sentinel_A','Sentinel_B','NDVI','angle','ismn_id','Aerosols', 'Blue',
       'Green', 'Red', 'RedEdge1', 'RedEdge2', 'RedEdge3', 'RedEdge4','WaterVapor','NIR', 'soil_moisture'],
    y_label = 'soil_moisture'
    )

In [None]:
mlp_lr_sm(
    paths = 'C://Users/USER/Desktop/Master_Irrigation/03_GIS/ground_trouth/sentinel_ismn_data/*',
    features = ['VV','VH', 'ASCENDING','DESCENDING','Sentinel_A','Sentinel_B','NDVI','angle','Aerosols', 'Blue',
       'Green', 'Red', 'RedEdge1', 'RedEdge2', 'RedEdge3', 'RedEdge4','WaterVapor','NIR', 'soil_moisture'],
    y_label = 'soil_moisture'
    )