In [1]:
import numpy as np
import pandas as pd
import re
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from scipy.stats import norm, lognorm, anderson, kstest
from sklearn import metrics
from rf_functions import data_setup, run_rf_reg
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
pd.options.mode.chained_assignment = None

In [15]:
s1_fp = 'input_data/s1_zonal_data.csv'
stats_fp = 'output/stats/scale_shape_both_dates.csv'
moran_fp = 'input_data/moran_max.csv'
output_dir_predict = 'output/rf_predictions/single_date/'
output_dir_metrics = 'output/rf_metrics/single_date/'
out_file_prefix = 'func_test_dB'

targets, predictors = data_setup(s1_fp, stats_fp, moran_fp, s1_units='dB', date=None)
cols_0218 = [c for c in targets.columns if '0219' in c]
cols_0302 = [c for c in targets.columns if '0304' in c]
cols_misc = [c for c in targets.columns if c not in cols_0218 and c not in cols_0302]
cols_misc.remove('Point_ID')
cols_misc.remove('moran_p')
cols_0218 += cols_misc
cols_0302 += cols_misc

cols_0218 = [c for c in cols_0218 if '10m' in c]
cols_0302 = [c for c in cols_0302 if '10m' in c]

targets_0218 = targets[cols_0218].dropna()
targets_0302 = targets[cols_0302].dropna()

run_rf_reg(targets_0218, predictors.loc[targets_0218.index], n_runs=100, 
       rf_type='single_target', output_dir_predict=output_dir_predict,
       output_dir_metrics=output_dir_metrics, out_file_prefix=out_file_prefix)
run_rf_reg(targets_0302, predictors.loc[targets_0302.index], n_runs=100, 
       rf_type='single_target', output_dir_predict=output_dir_predict,
       output_dir_metrics=output_dir_metrics, out_file_prefix=out_file_prefix)

2022-07-16 17:09:42 -- Starting single target RF regression (10 targets total).
2022-07-16 17:09:42 -- Starting target zonal_0219_10m_p5


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 17:19:25 -- Starting target zonal_0219_10m_p25


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 17:29:04 -- Starting target zonal_0219_10m_p75


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 17:38:38 -- Starting target zonal_0219_10m_p95


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 17:48:11 -- Starting target zonal_0219_10m_range


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 17:57:43 -- Starting target zonal_0219_10m_min


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 18:07:19 -- Starting target zonal_0219_10m_med


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 18:17:12 -- Starting target zonal_0219_10m_max


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 18:26:56 -- Starting target zonal_0219_10m_iqr


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 18:36:10 -- Starting target zonal_0219_10m_p95-p5


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 18:45:40 -- Starting single target RF regression (10 targets total).
2022-07-16 18:45:40 -- Starting target zonal_0304_10m_p5


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 18:59:55 -- Starting target zonal_0304_10m_p25


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 19:15:11 -- Starting target zonal_0304_10m_p75


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 19:30:00 -- Starting target zonal_0304_10m_p95


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 19:44:41 -- Starting target zonal_0304_10m_range


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 19:59:28 -- Starting target zonal_0304_10m_min


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 20:14:38 -- Starting target zonal_0304_10m_med


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 20:29:39 -- Starting target zonal_0304_10m_max


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 20:44:34 -- Starting target zonal_0304_10m_iqr


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 20:59:21 -- Starting target zonal_0304_10m_p95-p5


  0%|          | 0/100 [00:00<?, ?it/s]

In [14]:
pd.isnull(targets_0218).sum().sum()

0

In [17]:
output_dir_predict = 'output/rf_predictions/multi_date/'
output_dir_metrics = 'output/rf_metrics/multi_date/'
out_file_prefix = 'func_test_dB'

def remove_date(col):
    if '0219' in col:
        date_start = col.index('0219')
    elif '0304' in col:
        date_start = col.index('0304')
    else:
        return col
    
    new_col = col[:date_start] + col[date_start+5:]
    if '_1_' in new_col:
        ind = new_col.index('_1_')
        new_col = new_col[:ind] + new_col[ind+2:]
    return new_col

cols_0218_new = [remove_date(c) for c in cols_0218]
cols_0302_new = [remove_date(c) for c in cols_0302]

targets_0218.columns = cols_0218_new
targets_0302.columns = cols_0302_new

targets_all = pd.concat([targets_0218, targets_0302])

run_rf_reg(targets_all, predictors, n_runs=100, rf_type='single_target', 
       output_dir_predict=output_dir_predict, output_dir_metrics=output_dir_metrics, 
       out_file_prefix=out_file_prefix)

2022-07-16 21:55:24 -- Starting single target RF regression (10 targets total).
2022-07-16 21:55:24 -- Starting target zonal_10m_p5


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 22:18:02 -- Starting target zonal_10m_p25


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 22:40:34 -- Starting target zonal_10m_p75


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 23:02:58 -- Starting target zonal_10m_p95


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 23:25:24 -- Starting target zonal_10m_range


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-16 23:46:48 -- Starting target zonal_10m_min


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-17 00:08:07 -- Starting target zonal_10m_med


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-17 00:29:20 -- Starting target zonal_10m_max


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-17 00:50:39 -- Starting target zonal_10m_iqr


  0%|          | 0/100 [00:00<?, ?it/s]

2022-07-17 01:11:52 -- Starting target zonal_10m_p95-p5


  0%|          | 0/100 [00:00<?, ?it/s]