In [1]:
#Import Packages
import os
import pandas as pd
import numpy as np
import rasterio as rio

#Change Working Directory
os.chdir(r'C:\Users\malva\Thesis\GIS\Layers\Raster\ValdAran')

#Read Rasters
lat = rio.open("latitude.asc")
long = rio.open("longitude.asc")

dem = rio.open('dem.asc')
slope = rio.open("slope.asc")
aspect = rio.open("aspect.asc")
curv_plan = rio.open("curvature_plan.asc")
curv_prof = rio.open("curvature_profile.asc")
facc = rio.open("facc.asc")
twi = rio.open("twi.asc")

soil = rio.open("soil.asc")
lulc = rio.open("lulc.asc")

rain_ant = rio.open("rain_ant.asc")
rain_event = rio.open("rain_event.asc")

pof_event = rio.open("PROB_failure_final_cond.asc")
pof_dry = rio.open("PROB_uncond_unst.asc")
pof_sat = rio.open("PROB_uncond_stable.asc")

inventory = rio.open("inventory.asc")

In [2]:
%%time
#Convert Rasters to 2D Arrays
lat_arr = lat.read(1)
long_arr = long.read(1)

dem_arr = dem.read(1)
slp_arr = slope.read(1)
asp_arr = aspect.read(1)
curv_plan_arr = curv_plan.read(1)
curv_prof_arr = curv_prof.read(1)
facc_arr = facc.read(1)
twi_arr = twi.read(1)

soil_arr = soil.read(1)
lulc_arr = lulc.read(1)

rain_ant_arr = rain_ant.read(1)
rain_event_arr = rain_event.read(1)

pof_event_arr = pof_event.read(1)
pof_dry_arr = pof_dry.read(1)
pof_sat_arr = pof_sat.read(1)
inv_arr = inventory.read(1)

Wall time: 3min 17s


In [3]:
%%time
#Create Data Frame <- 2D Arrays have to transformed to 1D
df = pd.DataFrame()

df['lat'] = lat_arr.ravel()
df['long'] = long_arr.ravel()

df['elevation'] = dem_arr.ravel()
df['slope'] = slp_arr.ravel()
df['aspect'] = asp_arr.ravel()
df['curv_plan'] = curv_plan_arr.ravel()
df['curv_prof'] = curv_prof_arr.ravel()
df['facc'] = facc_arr.ravel()
df['twi'] = twi_arr.ravel()

df['soil'] = soil_arr.ravel()
df['lulc'] = lulc_arr.ravel()

df['rain_ant'] = rain_ant_arr.ravel()
df['rain_event'] = rain_event_arr.ravel()

df['pof_event'] = pof_event_arr.ravel()
df['pof_dry'] = pof_dry_arr.ravel()
df['pof_sat'] = pof_sat_arr.ravel()
df['response'] = inv_arr.ravel()

Wall time: 1.06 s


In [4]:
for i in df.columns:
    print(str(i) + ': ' + str(df[(df[i] == -9999)].shape[0]))

lat: 489384
long: 489384
elevation: 489384
slope: 489384
aspect: 498504
curv_plan: 489384
curv_prof: 489384
facc: 489384
twi: 498504
soil: 434739
lulc: 460737
rain_ant: 460737
rain_event: 459089
pof_event: 470286
pof_dry: 470286
pof_sat: 470286
response: 0


In [5]:
#Replace NoData with Numpy np.nan
df_nan = df.replace(-9999, np.nan)
print("Cells in Raster Extent: ", df.shape[0])
df_nan = df_nan.dropna()
df_nan = df_nan.reset_index(drop=True)
print("Cells with Values: ", df_nan.shape[0])

#Transform FAcc to LOG10
df_nan['facc'] = np.log10(df_nan['facc'])
from numpy import inf
df_nan = df_nan.replace(-inf, 0)

df_nan.head()

Cells in Raster Extent:  13486326
Cells with Values:  12987422


  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,lat,long,elevation,slope,aspect,curv_plan,curv_prof,facc,twi,soil,lulc,rain_ant,rain_event,pof_event,pof_dry,pof_sat,response
0,4739047.5,310462.5,843.359985,22.397577,91.39006,-0.0,-0.0,0.0,-10.626486,2.0,3.0,0.5,48.0,0.0,0.0,0.9732,0.0
1,4739047.5,310467.5,841.299988,22.348549,91.393448,-5.759246e-07,-0.000317,1.39794,4.107747,2.0,1.0,0.5,48.0,0.0,0.0,0.9987,0.0
2,4739047.5,310472.5,839.25,22.250391,91.400269,-5.844181e-07,-0.000317,1.69897,4.805774,2.0,1.0,0.5,48.0,0.0,0.0,0.9988,0.0
3,4739047.5,310477.5,837.210022,22.201574,91.405396,3.598404e-09,2e-06,1.875061,5.213672,2.0,1.0,0.5,48.0,0.0,0.0,0.9988,0.0
4,4739047.5,310482.5,835.169983,23.661343,91.56929,2.053477e-05,0.009212,2.0,5.430331,2.0,1.0,0.5,48.0,0.0,0.0,0.9946,0.0


In [6]:
df_nan[(df_nan["response"] == 1)].shape[0]

391

In [7]:
#Separating into True and False groups of the Response attribute
true_cells = df_nan[df_nan.response == True]
false_cells = df_nan[df_nan.response == False]
print("True cells: ", true_cells.shape[0], "\nFalse cells: ", false_cells.shape[0])

#Sampling from False
false_sample = false_cells.sample(n = true_cells.shape[0], random_state = 0)
#false_sample = false_cells.sample(n = 5000, random_state = 0)
print(false_sample.shape[0], " have been sampled from the false cells.")

#Creating the DataFrame for the data-driven model
df_dd = pd.concat([true_cells, false_sample])
print("Therefore, the final DataFrame for the data-driven model has:", df_dd.shape[0], "observations.")
df_dd.head()

True cells:  391 
False cells:  12987031
391  have been sampled from the false cells.
Therefore, the final DataFrame for the data-driven model has: 782 observations.


Unnamed: 0,lat,long,elevation,slope,aspect,curv_plan,curv_prof,facc,twi,soil,lulc,rain_ant,rain_event,pof_event,pof_dry,pof_sat,response
144779,4738872.5,311597.5,790.799988,13.145604,327.057495,0.025769,-0.010359,1.39794,4.673251,2.0,3.0,0.5,49.0,0.0,0.0,1.0,1.0
152975,4738862.5,311617.5,794.77002,18.441339,307.935059,-0.117006,-0.044545,2.352183,6.514342,2.0,3.0,0.5,49.0,0.0,0.0,1.0,1.0
289083,4738697.5,319872.5,1709.359985,43.968288,173.810425,0.000644,0.018706,2.889302,6.688886,8.0,1.0,0.5,70.0,0.8803,0.3543,0.1197,1.0
993416,4737832.5,318912.5,1285.219971,47.644943,194.897537,0.076285,0.069331,0.0,-11.605381,8.0,1.0,0.5,69.0,0.6894,0.5387,0.1381,1.0
1362535,4737377.5,317017.5,1734.319946,39.489166,182.852234,-0.000287,-0.003942,3.122216,7.382731,8.0,1.0,0.5,67.0,0.6695,0.0711,0.3305,1.0


In [8]:
#Export the data set
df_dd.to_csv (r'C:\Users\malva\Thesis\Files\Samples\ValdAran_Sample.csv', index=False, header=True)
#df_dd.to_csv (r'C:\Users\malva\Thesis\Files\Samples\ValdAran_Sample_5391.csv', index=False, header=True)