In [1]:
# load dataframe
import pandas as pd
pd.options.display.max_columns = 999
df = pd.read_csv('Droughts_satelite_data_events_1month_normalized_corrected.csv', index_col=0)
df = df.drop(columns=['month', 'day', 'year'])
df['date'] = pd.to_datetime(df['date'])
df.head()

Unnamed: 0,Country,District,date,NDVI,EVI,precipitation_per_hour_v1,precipitation_per_hour_v2,surface_temperature_daytime,surface_temperature_nighttime,evapotranspiration,rainfall,SoilMoisture00_10cm,SoilMoisture10_40cm,SoilMoisture40_100cm,SoilMoisture100_200cm,SoilTemperature00_10cm,SoilTemperature10_40cm,SoilTemperature40_100cm,SoilTemperature100_200cm,air_temperature,wind_speed,SPEI_1month,SPEI_2month,SPEI_3month,SPEI_4month,SPEI_5month,SPEI_6month,SPEI_7month,SPEI_8month,SPEI_9month,SPEI_10month,SPEI_11month,SPEI_12month,drought_reported,drought_news_article,drought_desinventar,drought_estimated
0,Uganda,ABIM,2000-03-01,-1.874641,-1.570809,-1.217134,-1.210825,1.914201,1.651377,-1.632338,-1.362386,-1.683524,-1.594857,-1.053627,-1.268783,2.063513,2.163876,2.106542,1.816289,2.028033,0.979077,-0.776332,,,,,,,,,,,,False,False,False,False
1,Uganda,ABIM,2000-04-01,-1.206775,-1.14346,0.554526,0.337322,1.06859,0.768848,0.34381,0.380445,-0.248744,-1.965811,-1.469958,-1.413054,0.80664,1.26101,1.648963,1.964269,1.483038,0.049384,-0.231381,-0.569433,,,,,,,,,,,False,False,False,False
2,Uganda,ABIM,2000-05-01,0.544968,0.484206,0.72615,0.654021,-0.209722,0.109642,1.241335,1.416795,0.717814,0.108791,-0.620995,-0.999464,-0.426027,0.004497,0.442751,1.120464,0.312072,-0.720565,0.874819,0.344453,0.075238,,,,,,,,,,False,False,False,False
3,Uganda,ABIM,2000-06-01,0.853214,0.948032,0.974735,0.485003,-0.526822,-0.525072,0.585442,1.246105,0.707451,-0.143555,-0.602664,-0.723248,-0.853696,-0.416556,-0.098606,0.309552,-0.555277,-1.249491,1.353014,1.302469,0.917779,0.711107,,,,,,,,,False,False,False,False
4,Uganda,ABIM,2000-07-01,1.180418,1.321811,0.368088,-0.002803,-1.099682,-1.324882,0.660043,1.021191,1.218931,1.271444,0.786433,0.311089,-1.53164,-1.284248,-1.029472,-0.551573,-1.411498,-1.310899,0.223291,0.865742,1.070958,0.836407,0.635385,,,,,,,,False,False,False,False


In [2]:
len(df[df['drought_estimated']])

305

In [3]:
# define target and predictors
y_label = ['drought_estimated']
X_labels = ['NDVI', 'EVI',
       'precipitation_per_hour_v1', 'precipitation_per_hour_v2',
       'surface_temperature_daytime', 'surface_temperature_nighttime',
       'evapotranspiration', 'rainfall', 'SoilMoisture00_10cm',
       'SoilMoisture10_40cm', 'SoilMoisture40_100cm', 'SoilMoisture100_200cm',
       'SoilTemperature00_10cm', 'SoilTemperature10_40cm',
       'SoilTemperature40_100cm', 'SoilTemperature100_200cm',
       'air_temperature', 'wind_speed', 'SPEI_1month', 'SPEI_2month', 'SPEI_3month', 'SPEI_4month',
       'SPEI_5month', 'SPEI_6month', 'SPEI_7month', 'SPEI_8month',
       'SPEI_9month', 'SPEI_10month', 'SPEI_11month', 'SPEI_12month']

In [4]:
# --- define utility functions ---
from statistics import median
import numpy as np

def crop_window_around_drought(df, drought_label, lower_bound=12, upper_bound=12):
    """ slice dataframe in time windows around certain events
    window size is [event_index-lower_bound, event_index+upper_bound]
    """
    idx = df.index.get_indexer_for(df[df[drought_label]].index)
    if len(idx) > 0:
        df_cropped = df.iloc[np.unique(np.concatenate([np.arange(max(i-lower_bound,0),
                                                                 min(i+upper_bound+1, len(df))) for i in idx]))]
    else:
        df_cropped = pd.DataFrame(columns=df.columns)
    return df_cropped

In [5]:
# crop around events
df_cropped = pd.DataFrame(columns=df.columns)

for dist in df.District.unique():
    
    # select district
    group = df[df['District']==dist]   
    
    # crop around event
    group = crop_window_around_drought(group, y_label[0])
    
    df_cropped = pd.concat([df_cropped, group], ignore_index=True)

print(len(df), '-->', len(df_cropped))
df = df_cropped.copy()

39532 --> 5972


In [8]:
import dateutil.relativedelta

n_time_steps = 6

X_labels_time = []
labels_all = y_label.copy()
X_labels_all = []
for ts in range(0, n_time_steps+1):
    add = [x+'_'+str(ts) for x in X_labels]
    X_labels_time.append(add)
    labels_all.extend(add)
    X_labels_all.extend(add)

# prepare final dataframe (dates, district)
districts = df['District'].unique()
months = pd.date_range(start='1/1/2000', end=df.index.min(), freq='MS') 
df_final = pd.DataFrame(index=pd.MultiIndex.from_product([districts, months],
                                                         names=['district', 'date']),
                       columns = labels_all)
# df_final.head()

for ix, row in df.iterrows():
    date_start = pd.to_datetime(row['date'])
    for t in range(0, n_time_steps+1):
        date_old = date_start - dateutil.relativedelta.relativedelta(months=t)
        try:
            data_old = df[(df.date==date_old) & (df.District==row['District'])]
            for col_num, col_name in enumerate(X_labels_time[t]):
                df_final.at[(row['District'], date_start), col_name] = data_old[X_labels[col_num]].values[0]
        except:
            continue
    df_final.at[(row['District'], date_start), 'drought_estimated'] = row['drought_estimated']

df_final.drought_estimated = df_final.drought_estimated.astype(bool)
df_final.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,drought_estimated,NDVI_0,EVI_0,precipitation_per_hour_v1_0,precipitation_per_hour_v2_0,surface_temperature_daytime_0,surface_temperature_nighttime_0,evapotranspiration_0,rainfall_0,SoilMoisture00_10cm_0,SoilMoisture10_40cm_0,SoilMoisture40_100cm_0,SoilMoisture100_200cm_0,SoilTemperature00_10cm_0,SoilTemperature10_40cm_0,SoilTemperature40_100cm_0,SoilTemperature100_200cm_0,air_temperature_0,wind_speed_0,SPEI_1month_0,SPEI_2month_0,SPEI_3month_0,SPEI_4month_0,SPEI_5month_0,SPEI_6month_0,SPEI_7month_0,SPEI_8month_0,SPEI_9month_0,SPEI_10month_0,SPEI_11month_0,SPEI_12month_0,NDVI_1,EVI_1,precipitation_per_hour_v1_1,precipitation_per_hour_v2_1,surface_temperature_daytime_1,surface_temperature_nighttime_1,evapotranspiration_1,rainfall_1,SoilMoisture00_10cm_1,SoilMoisture10_40cm_1,SoilMoisture40_100cm_1,SoilMoisture100_200cm_1,SoilTemperature00_10cm_1,SoilTemperature10_40cm_1,SoilTemperature40_100cm_1,SoilTemperature100_200cm_1,air_temperature_1,wind_speed_1,SPEI_1month_1,SPEI_2month_1,SPEI_3month_1,SPEI_4month_1,SPEI_5month_1,SPEI_6month_1,SPEI_7month_1,SPEI_8month_1,SPEI_9month_1,SPEI_10month_1,SPEI_11month_1,SPEI_12month_1,NDVI_2,EVI_2,precipitation_per_hour_v1_2,precipitation_per_hour_v2_2,surface_temperature_daytime_2,surface_temperature_nighttime_2,evapotranspiration_2,rainfall_2,SoilMoisture00_10cm_2,SoilMoisture10_40cm_2,SoilMoisture40_100cm_2,SoilMoisture100_200cm_2,SoilTemperature00_10cm_2,SoilTemperature10_40cm_2,SoilTemperature40_100cm_2,SoilTemperature100_200cm_2,air_temperature_2,wind_speed_2,SPEI_1month_2,SPEI_2month_2,SPEI_3month_2,SPEI_4month_2,SPEI_5month_2,SPEI_6month_2,SPEI_7month_2,SPEI_8month_2,SPEI_9month_2,SPEI_10month_2,SPEI_11month_2,SPEI_12month_2,NDVI_3,EVI_3,precipitation_per_hour_v1_3,precipitation_per_hour_v2_3,surface_temperature_daytime_3,surface_temperature_nighttime_3,evapotranspiration_3,rainfall_3,SoilMoisture00_10cm_3,SoilMoisture10_40cm_3,SoilMoisture40_100cm_3,SoilMoisture100_200cm_3,SoilTemperature00_10cm_3,SoilTemperature10_40cm_3,SoilTemperature40_100cm_3,SoilTemperature100_200cm_3,air_temperature_3,wind_speed_3,SPEI_1month_3,SPEI_2month_3,SPEI_3month_3,SPEI_4month_3,SPEI_5month_3,SPEI_6month_3,SPEI_7month_3,SPEI_8month_3,SPEI_9month_3,SPEI_10month_3,SPEI_11month_3,SPEI_12month_3,NDVI_4,EVI_4,precipitation_per_hour_v1_4,precipitation_per_hour_v2_4,surface_temperature_daytime_4,surface_temperature_nighttime_4,evapotranspiration_4,rainfall_4,SoilMoisture00_10cm_4,SoilMoisture10_40cm_4,SoilMoisture40_100cm_4,SoilMoisture100_200cm_4,SoilTemperature00_10cm_4,SoilTemperature10_40cm_4,SoilTemperature40_100cm_4,SoilTemperature100_200cm_4,air_temperature_4,wind_speed_4,SPEI_1month_4,SPEI_2month_4,SPEI_3month_4,SPEI_4month_4,SPEI_5month_4,SPEI_6month_4,SPEI_7month_4,SPEI_8month_4,SPEI_9month_4,SPEI_10month_4,SPEI_11month_4,SPEI_12month_4,NDVI_5,EVI_5,precipitation_per_hour_v1_5,precipitation_per_hour_v2_5,surface_temperature_daytime_5,surface_temperature_nighttime_5,evapotranspiration_5,rainfall_5,SoilMoisture00_10cm_5,SoilMoisture10_40cm_5,SoilMoisture40_100cm_5,SoilMoisture100_200cm_5,SoilTemperature00_10cm_5,SoilTemperature10_40cm_5,SoilTemperature40_100cm_5,SoilTemperature100_200cm_5,air_temperature_5,wind_speed_5,SPEI_1month_5,SPEI_2month_5,SPEI_3month_5,SPEI_4month_5,SPEI_5month_5,SPEI_6month_5,SPEI_7month_5,SPEI_8month_5,SPEI_9month_5,SPEI_10month_5,SPEI_11month_5,SPEI_12month_5,NDVI_6,EVI_6,precipitation_per_hour_v1_6,precipitation_per_hour_v2_6,surface_temperature_daytime_6,surface_temperature_nighttime_6,evapotranspiration_6,rainfall_6,SoilMoisture00_10cm_6,SoilMoisture10_40cm_6,SoilMoisture40_100cm_6,SoilMoisture100_200cm_6,SoilTemperature00_10cm_6,SoilTemperature10_40cm_6,SoilTemperature40_100cm_6,SoilTemperature100_200cm_6,air_temperature_6,wind_speed_6,SPEI_1month_6,SPEI_2month_6,SPEI_3month_6,SPEI_4month_6,SPEI_5month_6,SPEI_6month_6,SPEI_7month_6,SPEI_8month_6,SPEI_9month_6,SPEI_10month_6,SPEI_11month_6,SPEI_12month_6
district,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1
ABIM,2001-02-01,False,-1.74053,-1.57638,-1.40918,-1.28021,1.79353,1.39024,-1.68457,-1.36407,-1.54566,-0.630702,-0.0802244,-0.529815,1.85097,1.77436,1.52761,1.18222,1.61522,1.09855,-0.686418,-0.257406,-0.272941,-1.26737,-0.475335,-0.456536,0.194474,-0.0129817,0.929048,1.27166,0.986999,0.750482,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
ABIM,2001-03-01,False,-1.71381,-1.38037,0.956518,1.51377,1.84005,1.53435,-0.323192,0.0373043,-0.489599,-0.468632,-0.245643,-0.660479,1.05294,1.35844,1.60155,1.66377,1.1378,0.847754,1.15193,0.804544,0.819153,0.467649,-0.403702,0.0211795,0.0539794,0.526776,0.296075,1.21296,1.47158,1.23105,-1.74053,-1.57638,-1.40918,-1.28021,1.79353,1.39024,-1.68457,-1.36407,-1.54566,-0.630702,-0.0802244,-0.529815,1.85097,1.77436,1.52761,1.18222,1.61522,1.09855,-0.686418,-0.257406,-0.272941,-1.26737,-0.475335,-0.456536,0.194474,-0.0129817,0.929048,1.27166,0.986999,0.750482,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
ABIM,2001-04-01,False,0.309868,0.205156,1.01533,0.610817,0.195604,0.458223,0.857911,0.700849,0.330451,-0.114672,-0.391943,-0.78128,0.244515,0.504204,0.835792,1.2598,0.872362,0.131928,-0.248999,0.256563,0.0364012,0.135282,0.114303,-0.473408,-0.244459,-0.176889,0.0547344,0.131476,0.789773,1.006,-1.71381,-1.38037,0.956518,1.51377,1.84005,1.53435,-0.323192,0.0373043,-0.489599,-0.468632,-0.245643,-0.660479,1.05294,1.35844,1.60155,1.66377,1.1378,0.847754,1.15193,0.804544,0.819153,0.467649,-0.403702,0.0211795,0.0539794,0.526776,0.296075,1.21296,1.47158,1.23105,-1.74053,-1.57638,-1.40918,-1.28021,1.79353,1.39024,-1.68457,-1.36407,-1.54566,-0.630702,-0.0802244,-0.529815,1.85097,1.77436,1.52761,1.18222,1.61522,1.09855,-0.686418,-0.257406,-0.272941,-1.26737,-0.475335,-0.456536,0.194474,-0.0129817,0.929048,1.27166,0.986999,0.750482,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
ABIM,2001-05-01,False,1.09935,1.16614,0.726755,0.492181,-0.702476,-0.124684,1.22737,0.930721,0.791186,0.343755,-0.0756078,-0.578316,-0.462675,-0.167721,0.132473,0.594789,0.358671,-0.967894,-0.0971654,-0.274166,0.159914,0.0513764,0.103192,0.071932,-0.383226,-0.184966,-0.143614,0.0729708,0.146224,0.73078,0.309868,0.205156,1.01533,0.610817,0.195604,0.458223,0.857911,0.700849,0.330451,-0.114672,-0.391943,-0.78128,0.244515,0.504204,0.835792,1.2598,0.872362,0.131928,-0.248999,0.256563,0.0364012,0.135282,0.114303,-0.473408,-0.244459,-0.176889,0.0547344,0.131476,0.789773,1.006,-1.71381,-1.38037,0.956518,1.51377,1.84005,1.53435,-0.323192,0.0373043,-0.489599,-0.468632,-0.245643,-0.660479,1.05294,1.35844,1.60155,1.66377,1.1378,0.847754,1.15193,0.804544,0.819153,0.467649,-0.403702,0.0211795,0.0539794,0.526776,0.296075,1.21296,1.47158,1.23105,-1.74053,-1.57638,-1.40918,-1.28021,1.79353,1.39024,-1.68457,-1.36407,-1.54566,-0.630702,-0.0802244,-0.529815,1.85097,1.77436,1.52761,1.18222,1.61522,1.09855,-0.686418,-0.257406,-0.272941,-1.26737,-0.475335,-0.456536,0.194474,-0.0129817,0.929048,1.27166,0.986999,0.750482,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
ABIM,2001-06-01,False,0.439057,0.83289,0.400655,0.242266,-1.10912,-0.664326,0.707593,0.831024,0.987119,0.711882,0.341294,-0.145116,-1.35222,-0.956833,-0.645847,-0.172194,-1.34909,-1.27407,0.632579,0.310965,0.0732815,0.332066,0.261521,0.342936,0.294927,-0.113829,0.0784581,0.137323,0.33932,0.448824,1.09935,1.16614,0.726755,0.492181,-0.702476,-0.124684,1.22737,0.930721,0.791186,0.343755,-0.0756078,-0.578316,-0.462675,-0.167721,0.132473,0.594789,0.358671,-0.967894,-0.0971654,-0.274166,0.159914,0.0513764,0.103192,0.071932,-0.383226,-0.184966,-0.143614,0.0729708,0.146224,0.73078,0.309868,0.205156,1.01533,0.610817,0.195604,0.458223,0.857911,0.700849,0.330451,-0.114672,-0.391943,-0.78128,0.244515,0.504204,0.835792,1.2598,0.872362,0.131928,-0.248999,0.256563,0.0364012,0.135282,0.114303,-0.473408,-0.244459,-0.176889,0.0547344,0.131476,0.789773,1.006,-1.71381,-1.38037,0.956518,1.51377,1.84005,1.53435,-0.323192,0.0373043,-0.489599,-0.468632,-0.245643,-0.660479,1.05294,1.35844,1.60155,1.66377,1.1378,0.847754,1.15193,0.804544,0.819153,0.467649,-0.403702,0.0211795,0.0539794,0.526776,0.296075,1.21296,1.47158,1.23105,-1.74053,-1.57638,-1.40918,-1.28021,1.79353,1.39024,-1.68457,-1.36407,-1.54566,-0.630702,-0.0802244,-0.529815,1.85097,1.77436,1.52761,1.18222,1.61522,1.09855,-0.686418,-0.257406,-0.272941,-1.26737,-0.475335,-0.456536,0.194474,-0.0129817,0.929048,1.27166,0.986999,0.750482,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
df_final.to_csv('df_analysis.csv')

In [51]:
# impose class balance, get training indices
pos_samples = df_final[df_final.drought_estimated]
num_samples = len(pos_samples)
neg_samples = df_final[~df_final.drought_estimated].sample(n=num_samples, replace=False, random_state=42)
df_analysis = pd.concat([pos_samples, neg_samples])
print(len(df_analysis[df_analysis['drought_estimated']]), len(df_analysis[~df_analysis['drought_estimated']]))

85 85
