# 01 - Compile Landslide Inventories

This notebook: 

- Reads original landslide inventory data from 10 publicly available landslide inventories
- Converts all inventories to a common format, assigning a trigger, type, material, and spatial uncertainty where possible
- Binds formatted inventories into a global compilation 
- Identifies which landslides occurred in urban areas according to the GHS-UCDB
- Removes duplicates between inventories
- Assigns time stamps in UTC
- Subsets global compilation to landslides that have a documented rainfall trigger, date, and occurred in urban areas for further analysis

The outputs of this notebook are: 
- ls_urban_ts_rf_u.pkl, a global compilation of rainfall triggered urban landslides with daily timestamps, which is read into 02_IdentifyGauges
- nls_rf_per_city_plot.csv, the number of landslides in each city for making Figure 1.

**Original data required:**

**Landslide inventories**

*see code below for citations, access, version, and licensing information*

- NASA Global Landslide Catalog
- Global Fatal Landslide Database V2
- Geoscience Australia Landslide Search
- Landslide and Torrential Colombia Database
- FraneItalia V3
- GNS New Zealand Landslide Database
- Landslide Inventory Rwanda
- Kentucky Geological Survey Landslide Inventory
- Digital Data Series DGS06-3 Landslides in New Jersey
- Seattle Historic Landslide Locations ECA

**Urban areas**

- Global Human Settlement Layer Urban Centre Database





In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
import pyproj
import shapely
from shapely.ops import transform
from timezonefinder import TimezoneFinder

In [None]:
#world map baselayer
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

In [None]:
def plot_lsinv(lsinv, title):
    
    """
    Plot a landslide inventory on a world map along with an annual time series of landslide counts.
    
    lsinv = geopandas dataframe with the landslide inventory.  Should have a daily date time index.
    title = title for the plot    
    
    """
    
    fig, ax = plt.subplots(2,1, figsize = (11, 9), gridspec_kw = {'height_ratios': [3,1]})
    
    colors = plt.cm.get_cmap('tab20')
    
    cidx = np.random.randint(0, 19)

    try:
    
        world.to_crs("ESRI:54009").plot(facecolor = '#F5F5F5',
                    edgecolor = 'darkgrey', 
                  ax = ax[0])
    
    except: 
        
        world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
        
        world.to_crs("ESRI:54009").plot(facecolor = '#F5F5F5',
                    edgecolor = 'darkgrey', 
                  ax = ax[0])
        
    
    #plot all the landslides in the inventory
    lsinv.to_crs("ESRI:54009").plot(ax = ax[0], 
                                                         alpha = 0.3, 
                                                         markersize = 5, 
                                                         color = 'darkgrey')

    #only plot the landslides with daily time stamps
    lsinv[lsinv.index.notna()].to_crs("ESRI:54009").plot(ax = ax[0], 
                                                          alpha = 0.5, 
                                                          markersize = 5, 
                                                        color = colors(cidx))

    ax[0].set_title(title, fontsize = 16, fontweight = 'bold')
    
    ax[0].tick_params(left=False,
                bottom=False,
                labelleft=False,
                labelbottom=False)
    
    #also add an annual time series of the daily time stamped landslides
    
    yts = lsinv[lsinv.index.notna()].resample('Y').count().iloc[:,0]
    
    ax[1].bar(x = yts.index.year,
                height = yts.values,
                width = 0.8, 
                  color = colors(cidx))
    
    ax[1].set_ylabel('Annual landslides')
    
    fig.tight_layout()
    
    return fig, ax
    

In [None]:
def dict_othercols(row, diff):
    """
    Make a dictionary out of all the original columns in the original landslide inventory 
    to put in one column of the simplified inventory
    
    row = row of simplified dataframe (must have same length and index as the diff dataframe)
    diff = dataframe with the extra info that should be put in the dictionary for safekeeping

    """
    
    #put all values for this entry in the original inventory into a dictionary
    d = dict(diff.loc[row.name])
    
    return d
    



In [None]:
def simplify_inventory(inventorydf, 
                      inventoryname, 
                      inventory_id, 
                      trigger, 
                      lstype,
                      material,
                      spat_unc, 
                      src_url, 
                      date_accessed, 
                      citation, 
                      license):
    
    """
    Takes a landslide inventory dataframe and makes a simplified, standard format landslide inventory dataframe
    out of it that can be combined with other inventories
    
    inventorydf = Geopandas dataframe containing at the least a geometry and "DATE" column 
    inventoryname = string with a descriptive name (e.g. "NASA GLC")
    inventory_id = name of column in original dataframe with a unique ID, or None
    trigger = name of column in original dataframe with the trigger, or None
    lstype = name of column in original dataframe with the type of landslide, or None
    spat_unc = name of column in original dataframe with the spatial uncertainty, or None
    src_url = string with url where data was downloaded from 
    date_accessed = string with date when data with downloaded in "YYYYmmdd" format
    citation = string with citation 
    license = string with type of license for the data   
    """
    
    
    #uniform naming
    inventorydf['inventory'] = inventoryname
    
    if inventorydf[inventory_id] is None:
        inventorydf['inventory_id'] = None
    else: 
        inventorydf['inventory_id'] = inventorydf[inventory_id]
    
    
    inventorydf['inventory_id_name'] = inventory_id
    
    if trigger is None:
        inventorydf['trigger'] = None
    else: 
        inventorydf['trigger'] = inventorydf[trigger]
    
    if lstype is None: 
        inventorydf['type'] = None
    else:
        inventorydf['type'] = inventorydf[lstype]
        
    if material is None: 
        inventorydf['material'] = None
    else:
        inventorydf['material'] = inventorydf[material]
            
    if spat_unc is None: 
        inventorydf['spat_unc'] = None
    else:
        inventorydf['spat_unc'] = inventorydf[spat_unc]
        
    inventorydf['src_url'] = src_url
    inventorydf['date_accessed'] = pd.to_datetime(date_accessed, format = '%Y%m%d')
    inventorydf['citation'] = citation
    inventorydf['license'] = license


    #simplify

    inventorydf_s = inventorydf.loc[:, ['inventory', 'src_index', 'inventory_id', 'inventory_id_name', 
                                'trigger', 'type', 'material', 'spat_unc', 'DATE', 'geometry', 
                                'src_url', 'date_accessed', 'citation', 'license']]

    #get all columns that aren't in the simplifed version

    diff = inventorydf.loc[:,inventorydf.columns.difference(inventorydf_s.columns)]

    #and put them in a dictionary in the simple dataframe 

    inventorydf_s['other'] = inventorydf_s.apply(lambda row:dict_othercols(row, diff), axis = 1)
    
    return inventorydf_s
 
    
    

In [None]:
#define common nomenclature for triggers, types, material, and spatial_uncertainty

triggers = ['Precipitation', 
            'Earthquake', 
            'Volcano', 
            'Human Activity', 
            'Other', 
            'Unknown']

types = ['Fall', 
        'Topple', 
         'Slide', 
        'Spread', 
        'Flow', 
        'Complex', 
         'Other',
        'Unknown']


material = ['Rock', 
           'Debris', 
           'Earth', 
           'Other', 
           'Unknown']

spatial_uncertainty = ['Exact', 
                      '<1km',
                      '<5km',
                      '<10km', 
                      '<25km',
                      '<50km', 
                       '<100km', 
                      '<250km',
                       '>250km',
                       'Unknown']

# Global inventories

### NASA Global Landslide Catalog

In [None]:
nasaglc = gpd.read_file('../nasa_global_landslide_catalog.gdb', 
                       layer = 'nasa_global_landslide_catalog_point')


In [None]:
#convert landslide time to datetime 
nasaglc['DATE'] = pd.to_datetime(nasaglc['event_date'])

#preserve original index
nasaglc['src_index'] = nasaglc.index

#reset index to  make sure we have unique and monotonic values
nasaglc.reset_index(inplace = True, drop = True)

#check if the crs is WGS84, and if not, reproject it

if nasaglc.crs != 'EPSG:4326':
    nasaglc.to_crs('EPSG:4326', inplace = True)
    

In [None]:
#assign triggers to uniform naming scheme

nasa_trigger_map = {'downpour':'Precipitation',
                    'earthquake':'Earthquake',
                    'unknown':'Unknown',
                    'tropical_cyclone':'Precipitation',
                    'rain':'Precipitation',
                    'snowfall_snowmelt':'Other',
                    'continuous_rain':'Precipitation',
                    'monsoon':'Precipitation',
                    'freeze_thaw':'Other',
                    'mining':'Human Activity',
                    'no_apparent_trigger':'Other',
                    'flooding':'Other',
                    'construction':'Human Activity',
                    'leaking_pipe':'Human Activity',
                    'Unknown':'Unknown',
                    'Downpour':'Precipitation',
                    'other':'Other',
                    'dam_embankment_collapse':'Other',
                    'volcano':'Volcano',
                    'Other':'Other',
                    'vibration':'Human Activity',
                    None:'Unknown',
                    'Flooding':'Other'}


nasaglc['trigger'] = nasaglc.apply(lambda row:nasa_trigger_map[row['landslide_trigger']], axis = 1)


In [None]:
#assign type to uniform naming scheme

nasa_type_map = {'landslide':'Slide', 
                 'rock_fall':'Fall', 
                 'mudslide':'Slide', 
                 'unknown':'Unknown', 
                 'debris_flow':'Flow',
                 'riverbank_collapse':'Slide',
                 'complex':'Complex',
                 'other':'Other',
                 'earth_flow':'Flow',
                 'rotational_slide':'Slide',
                 'Riverbank collapse':'Slide',
                 'Rock fall':'Fall',
                 'Unknown':'Unknown',
                 'Mudslide':'Slide',
                 'snow_avalanche':'Other',
                 'translational_slide':'Slide',
                 'creep':'Other',
                 'Landslide':'Slide',
                 'Rotational slide':'Slide',
                 'lahar':'Flow',
                 None:'Unknown',
                 'Other':'Other',
                 'Complex':'Complex',
                 'topple':'Topple',
                 'Rock slide':'Slide',
                 'Unkown':'Unknown',
                 'Creep':'Other',
                 'Rockfall':'Fall'}

nasaglc['type'] = nasaglc.apply(lambda row:nasa_type_map[row['landslide_category']], axis = 1)



In [None]:
#assign material to uniform naming scheme

nasa_material_map = {'landslide':'Unknown', 
                 'rock_fall':'Rock', 
                 'mudslide':'Earth', 
                 'unknown':'Unknown', 
                 'debris_flow':'Debris',
                 'riverbank_collapse':'Unknown',
                 'complex':'Unknown',
                 'other':'Unknown',
                 'earth_flow':'Flow',
                 'rotational_slide':'Unknown',
                 'Riverbank collapse':'Unknown',
                 'Rock fall':'Rock',
                 'Unknown':'Unknown',
                 'Mudslide':'Earth',
                 'snow_avalanche':'Other',
                 'translational_slide':'Unknown',
                 'creep':'Unknown',
                 'Landslide':'Unknown',
                 'Rotational slide':'Unknown',
                 'lahar':'Other',
                 None:'Unknown',
                 'Other':'Unknown',
                 'Complex':'Unknown',
                 'topple':'Unknown',
                 'Rock slide':'Rock',
                 'Unkown':'Unknown',
                 'Creep':'Unknown',
                 'Rockfall':'Rock'}

nasaglc['material'] = nasaglc.apply(lambda row:nasa_material_map[row['landslide_category']], axis = 1)

In [None]:
#assign spatial uncertainty to uniform naming scheme

nasa_uncert_map = {'5km':'<5km',
                   'unknown':'Unknown',
                   '50km':'<50km',
                   'exact':'Exact',
                   '1km':'<1km',
                   '25km':'<25km',
                   '10km':'<10km',
                   '100km':'<100km',
                   '250km':'<250km',
                   'Unknown':'Unknown',
                   'Known exactly':'Exact',
                   None:'Unknown'}

nasaglc['spat_unc'] = nasaglc.apply(lambda row:nasa_uncert_map[row['location_accuracy']], axis = 1)

In [None]:
nasaglc_s =  simplify_inventory(inventorydf = nasaglc, 
                                inventoryname = "NASA GLC", 
                                  inventory_id = 'event_id', 
                                  trigger = 'trigger', 
                                  lstype = 'type',
                                  material = 'material',
                                  spat_unc = 'spat_unc', 
                                  src_url = 'https://maps.nccs.nasa.gov/arcgis/apps/MapAndAppGallery/index.html?appid=574f26408683485799d02e857e5d9521', 
                                  date_accessed = '20220725', 
                                  citation = """Kirschbaum, D.B., Stanley, T., & Zhou, Y. (2015). 
                                                 Spatial and temporal analysis of a global landslide catalog. 
                                                 Geomorphology, 249, 4-15.""", 
                                  license = 'permissive, see source URL')

### Global Fatal Landslide Database Version 2

We use the layer "Landslidepoints_04to17"

In [None]:
gfld = gpd.read_file("""../GFLD Version 2 Public-20220520T152800Z-001/GFLD Version 2 Public/Landslidepoints_04to17/Landslidepoints_04to17.shp""")

gfld["DATE"] = pd.to_datetime(gfld['Date'])

#preserve original index
gfld['src_index'] = gfld.index

#reset index to  make sure we have unique and monotonic values
gfld.reset_index(inplace = True, drop = True)

#check if the crs is WGS84, and if not, reproject it

if gfld.crs != 'EPSG:4326':
    gfld.to_crs('EPSG:4326', inplace = True)
    


In [None]:
#assign triggers to uniform naming scheme

gfld_trigger_map = {'rainfall':'Precipitation', 
                     'mining (unknown)':'Human Activity', 
                     'construction':'Human Activity', 
                     'unknown':'Unknown',
                     'illegal hillcutting':'Human Activity',
                     'illegal mining':'Human Activity',
                     'earthquake':'Earthquake',
                     'legal mining':'Human Activity',
                     'leaking pipe':'Human Activity',
                     'volcanic eruption':'Volcano',
                     'fire':'Other',
                     'recreation':'Human Activity',
                     'progressive failure':'Other',
                     'freeze-thaw':'Other',
                     'freezing':'Other',
                     'human activity (unspecified)':'Human Activity',
                     'garbage collapse':'Human Activity',
                     'natural dam or riverbank collapse':'Other',
                     'animal activity':'Other',
                     'conflict and explosion':'Human Activity',
                     'marine erosion':'Other'}


gfld['trigger'] = gfld.apply(lambda row:gfld_trigger_map[row['Trigger']], axis = 1)


In [None]:
#assign type to uniform naming scheme

gfld['type'] = 'Unknown' #type is not documented in the GFLD

In [None]:
#assign material to uniform naming scheme

gfld['material'] = 'Unknown' #material is not documented in the GFLD

In [None]:
#assign spatial uncertainty to uniform naming scheme


def gfld_to_uncert(precision):
    
    rad_in_m = precision/1000 #precision records the radius from the landslide point to the outermost
                            #corner of, for example, the county in mm
    
    if rad_in_m < 100:
        spat_unc = 'Exact'
        
    elif rad_in_m < 1000:
        spat_unc = '<1km'
        
    elif rad_in_m < 5000:
        spat_unc = '<5km'
        
    elif rad_in_m < 10000:
        spat_unc = '<10km'
    
    elif rad_in_m < 10000:
        spat_unc = '<10km'

    elif rad_in_m < 25000:
        spat_unc = '<25km'
        
    elif rad_in_m < 50000:
        spat_unc = '<50km'
        
    elif rad_in_m < 100000:
        spat_unc = '<100km'
    
    elif rad_in_m < 250000:
        spat_unc = '<250km'
    
    elif rad_in_m > 250000:
        spat_unc = '>250km'
    
    else: 
        spat_unc = 'Unknown'
        
    return spat_unc



In [None]:
gfld['spat_unc'] = gfld.apply(lambda row:gfld_to_uncert(row['Precision']), axis = 1)

In [None]:
gfld_s =  simplify_inventory(inventorydf = gfld, 
                                inventoryname = 'GFLD V2', 
                                  inventory_id = 'LandslideN', 
                                  trigger = 'trigger', 
                                  lstype = 'type',
                                  material = 'material',                                
                                  spat_unc = 'spat_unc', 
                                  src_url = 'https://drive.google.com/drive/folders/1kxUSXBl10OHPrGXIur6bM4PBoKX0C_LY', 
                                  date_accessed = '20220520', 
                                  citation = """Froude, M. J., & Petley, D. N. (2018). 
                                                  Global fatal landslide occurrence from 2004 to 2016. 
                                                  Natural Hazards and Earth System Sciences, 18(8), 2161–2181. 
                                                  https://doi.org/10.5194/nhess-18-2161-2018""", 
                                  license = 'https://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/')

In [None]:
plot_lsinv(gfld_s.set_index('DATE'), "Global Fatal Landslide Database")

# Other inventories by country

### Australia

#### Geoscience Australia Landslide Search

In [None]:
Landslip_details = pd.read_excel('../Landslip_Export_20180706_1224.xls', 
                                sheet_name = 'Landslip_details', 
                                header = 1)

In [None]:
Landslide_details = pd.read_excel('../Landslip_Export_20180706_1224.xls', 
                                sheet_name = 'Landslide_details', 
                                header = 1)

In [None]:
australia = pd.merge(Landslip_details, 
                    Landslide_details,
                    how = 'left', 
                    on = 'Landslip\nID')

In [None]:
def aus_todatetime(row):
      
    if (row['Event\nDate'].count('-')==2): #dates with two dashes contain a daily time stamp

        try:
            d = pd.to_datetime(row['Event\nDate'], dayfirst = True)
        except:
            d = np.nan

    else: 
        d = np.nan

        
    return d

In [None]:
#convert landslide time to datetime 
australia['DATE'] = australia.apply(lambda row:aus_todatetime(row), axis = 1)

#preserve original index
australia['src_index'] = australia.index.values

#reset index to  make sure we have unique and monotonic values
australia.reset_index(inplace = True, drop = True)

#only take the landslides (we don't want erosion)

australia = australia[australia['Hazard\nType']=='Landslide'].copy()


In [None]:
#create a geodataframe

australia = gpd.GeoDataFrame(data = australia, 
                             geometry = gpd.points_from_xy(australia.Longitude, 
                                                          australia.Latitude), 
                            crs = 'EPSG:4326')

In [None]:
#assign triggers to uniform naming scheme

australia_trigger_map = {np.nan:'Unknown', 
                          'Intense, short period rainfall':'Precipitation',
                          'Prolonged high precipitation, Intense, short period rainfall':'Precipitation',
                          'Prolonged high precipitation':'Precipitation', 
                         'Flash flooding':'Other',
                          'Intense, short period rainfall, Intense, short period rainfall':'Precipitation',
                          'Wave erosion':'Other', 
                          'Earthquake':'Earthquake', 
                          'Fluvial erosion':'Other',
                          'Severe storm, Intense, short period rainfall':'Precipitation',
                          'Flash flooding, Prolonged high precipitation':'Precipitation', 
                          'Natural unknown':'Unknown',
                          'Flash flooding, Fluvial erosion, Prolonged high precipitation':'Precipitation',
                          'Flash flooding, Severe storm, Intense, short period rainfall':'Precipitation',
                          'Intense, short period rainfall, Severe storm':'Precipitation',
                          'Severe storm, Prolonged high precipitation':'Precipitation',
                          'Prolonged high precipitation, Severe storm':'Precipitation',
                          'Flash flooding, Fluvial erosion':'Other',
                           'Intense, short period rainfall, Prolonged high precipitation':'Precipitation',
                           'Subsurface erosion/weathering':'Other',
                           'Intense, short period rainfall, Flash flooding, Severe storm':'Precipitation',
                           'Severe storm':'Precipitation', 
                          'Intense, short period rainfall, Flash flooding':'Precipitation',
                           'Wave erosion, Subsurface erosion/weathering':'Other',
                           'Wave erosion, Prolonged high precipitation':'Precipitation'}



def australia_trigger(row):
    
    if type(row['Human Triggering\nFactor']) is str:
        trigger = 'Human Activity'
        
    else:
        trigger = australia_trigger_map[row['Natural Triggering\nFactor']]
        
    return trigger



australia['trigger'] = australia.apply(lambda row:australia_trigger(row), axis = 1)

In [None]:
australia_type_map = {np.nan:'Unknown', 
                      'Slide':'Slide', 
                      'Flow':'Flow', 
                      'Fall':'Fall', 
                      'Topple':'Topple', 
                      'Slump':'Other', 
                      'Unknown':'Unknown'}

#assign type to uniform naming scheme

australia['type'] = australia.apply(lambda row:australia_type_map[row['Movement\nType']], axis =1) 

In [None]:
#assign material to uniform naming scheme

australia_material_map = {np.nan:'Unknown', 
                          'Rock':'Rock',
                          'Debris':'Debris',
                          'Earth':'Earth'}

australia['material'] = australia.apply(lambda row:australia_material_map[row['Material\nType']], axis =1) 

In [None]:
#assign spatial uncertainty to uniform naming scheme

australia_uncertainty_map = {np.nan:'Unknown', 
                             'Place name search':'<50km',
                             'Other':'Unknown',
                             'Location edited by gis methods':'Exact',
                             'Reported location':'<1km',
                             'Map located':'<1km',
                             'Gps measurement':'Exact',
                             'Satellite imagery':'<1km'}

australia['spat_unc'] = australia.apply(lambda row:australia_uncertainty_map[row['Capture\nMethod_y']], axis =1) 

In [None]:
australia_s =  simplify_inventory(inventorydf = australia, 
                                inventoryname = 'Geoscience Australia Landslide Search', 
                                  inventory_id = 'Landslip\nID', 
                                  trigger = 'trigger', 
                                  lstype = 'type',
                                  material = 'material',                                
                                  spat_unc = 'spat_unc', 
                                  src_url = 'https://researchdata.edu.au/landslide-search/1261711', 
                                  date_accessed = '20220803', 
                                  citation = """Geoscience Australia (2012): Landslide Search. software. http://pid.geoscience.gov.au/dataset/ga/74273""", 
                                  license = 'Creative Commons Attribution 4.0 International Licence http://creativecommons.org/licenses/by/4.0')

In [None]:
plot_lsinv(australia_s.set_index('DATE'), 'Geoscience Australia Landslide Search')

In [None]:
del (Landslide_details, 
     Landslip_details, 
     aus_todatetime, 
     australia_material_map, 
     australia_trigger, 
     australia_trigger_map, 
     australia_type_map, 
     australia_uncertainty_map)

### Landslide & Torrential Colombia Database

In [None]:
colombia = pd.read_excel('../df_AVT_08_13_22.xlsx')

In [None]:
#convert landslide time to datetime 
colombia['DATE'] = pd.to_datetime(colombia['date'])

#preserve original index
colombia['src_index'] = colombia.index.values

#reset index to  make sure we have unique and monotonic values
colombia.reset_index(inplace = True, drop = True)

In [None]:
#parse location into longitude and latitude

def col_to_lat(row):
    
    lat = float(row['location'][1:-1].split(',')[1].strip())

    return lat 

def col_to_long(row):
    
    long = float(row['location'][1:-1].split(',')[0].strip())
    
    return long 


In [None]:
colombia['longitude'] = colombia.apply(lambda row:col_to_long(row), axis = 1)
colombia['latitude'] = colombia.apply(lambda row:col_to_lat(row), axis = 1)



In [None]:
#create a geodataframe

colombia = gpd.GeoDataFrame(data = colombia, 
                              geometry = gpd.points_from_xy(colombia.longitude, 
                                                          colombia.latitude), 
                                crs = 'EPSG:4326')

In [None]:
#assign triggers to uniform naming scheme

colombia_trigger_map = {'Anthropic':'Human Activity', 
                        'Rainfall':'Precipitation',
                        'Unknown':'Unknown',
                        'Earthquake':'Earthquake',
                        'Erosion':'Other',
                        'Construcción':'Human Activity',
                        'Temporada seca':'Other',
                        'Lluvias y obstrucción de la quebrada Cañada Negra por basura':'Precipitation',
                        'Unkown':'Unknown',
                        'Negligencia':'Human Activity',
                        'Actividad Tectónica':'Earthquake',
                        'Condiciones Atmosfericas':'Other',
                        'Desconocido':'Unknown'}



colombia['trigger'] = colombia.apply(lambda row:colombia_trigger_map[row['triggering']], axis = 1)

In [None]:
colombia['type_orig'] = colombia['type']


#assign type to uniform naming scheme
colombia_type_map = {'Landslide':'Slide', 
                     'Flood':'Unknown', 
                     'Torrential':'Unknown', 
                     'Rockfall':'Fall', 
                     'Earthquake':'Unknown',
                     'Debris flow':'Flow',
                     'Unknown':'Unknown',
                     'Forest fire':'Unknown',
                     'Volcanic eruption':'Other',
                     'Fall':'Fall',
                     'Debris':'Flow',
                     'Fire':'Unknown'}


colombia['type'] = colombia.apply(lambda row:colombia_type_map[row['type_orig']], axis = 1)

In [None]:
#assign material to uniform naming scheme


#assign material to uniform naming scheme
colombia_material_map = {'Landslide':'Earth', 
                         'Flood':'Unknown', 
                         'Torrential':'Unknown', 
                         'Rockfall':'Rock', 
                         'Earthquake':'Unknown',
                         'Debris flow':'Debris',
                         'Unknown':'Unknown',
                         'Forest fire':'Unknown',
                         'Volcanic eruption':'Other',
                         'Fall':'Unknown',
                         'Debris':'Debris',
                         'Fire':'Unknown'}


colombia['material'] = colombia.apply(lambda row:colombia_material_map[row['type_orig']], axis = 1)

The values are: low<100m, med 100m-10km, and high is >10km.  Daissy Herrerra Posada, pers. communication.

In [None]:
#assign spatial uncertainty

colombia_spatial_map = {'Mid':'<10km', 
                        'High':'Unknown', 
                        'Low':'Exact', 
                        'mid':'<10km', 
                        'low':'Exact',
                        'high':'Unknown'}


colombia['spat_unc'] = colombia.apply(lambda row:colombia_spatial_map[row['uncertainty']], axis = 1)

In [None]:
colombia_s =  simplify_inventory(inventorydf = colombia, 
                                inventoryname = 'Landslide & Torrential Colombia Geodatabase', 
                                  inventory_id = '_id', 
                                  trigger = 'trigger', 
                                  lstype = 'type',
                                  material = 'material',                                
                                  spat_unc = 'spat_unc', 
                                  src_url = 'https://landslides-colombia.herokuapp.com/#/find', 
                                  date_accessed = '20220814', 
                                  citation = """Geohazards Semillero de Investigacion, Universidad Nacional de Colombia.
                                                2022. Landslide & Torrential Colombia Geodatabase.""", 
                                  license = 'Unknown')


In [None]:
fig, ax = plot_lsinv(colombia_s.set_index('DATE'), "colombia")

### Italy

#### FraneItalia V3

In [None]:
franeitalia = pd.read_excel('../FraneItalia_2010-2020.xlsx', 
                            sheet_name = 'ALL', 
                           header = 2)

In [None]:
#convert landslide time to datetime 
franeitalia['DATE'] = pd.to_datetime(franeitalia['Initial day'])

#remove the date from those which are estimated - we only want certain days

franeitalia.loc[franeitalia['Td'] == 'estimation', 'DATE'] = None


#preserve original index
franeitalia['src_index'] = franeitalia.index

#reset index to  make sure we have unique and monotonic values
franeitalia.reset_index(inplace = True, drop = True)

#create a geodataframe

franeitalia = gpd.GeoDataFrame(data = franeitalia, 
                              geometry = gpd.points_from_xy(franeitalia.LONG, 
                                                          franeitalia.LAT), 
                                crs = 'EPSG:4326')



In [None]:
#assign triggers to uniform naming scheme

franeitalia_trigger_map = {'rainfall':'Precipitation', 
                           'rainfall + other':'Precipitation',
                           'n.a.':'Unknown',
                           'earthquake':'Earthquake',
                           'snowmelt':'Other',
                           'other':'Other',
                           'human activity':'Human Activity'}

franeitalia['trigger'] = franeitalia.apply(lambda row:franeitalia_trigger_map[row['Trigger']], axis = 1)

In [None]:
#assign type to uniform naming scheme
franeitalia_type_map = {np.nan:'Unknown',
                        'fall':'Fall',
                        'flow':'Flow',
                        'slide':'Slide',
                        'more than one':'Complex',
                        ' ':'Unknown',
                        ' flow':'Flow'}


franeitalia['type'] = franeitalia.apply(lambda row:franeitalia_type_map[row['Typology']], axis = 1)

In [None]:
#assign material to uniform naming scheme


#assign material to uniform naming scheme
franeitalia_material_map = {'more than one':'Other', 
                            'rock':'Rock',
                            'soil':'Earth',
                            np.nan:'Unknown',
                            'debris':'Debris',
                            ' ':'Unknown'}


franeitalia['material'] = franeitalia.apply(lambda row:franeitalia_material_map[row['Material']], axis = 1)

In [None]:
#assign spatial uncertainty

#C = Certain -> Exact
#A = Approximated -> approximating as <10 km
#M = Municipality -> approximating as <50 km
#AE = Geographical Region -> approximating as <250km

franeitalia_spatial_map = {'M':'<50km', 
                           'AE':'<250km',
                           'A':'<10km',
                           'C':'Exact',
                           'c':'Exact'}


franeitalia['spat_unc'] = franeitalia.apply(lambda row:franeitalia_spatial_map[row['Sd']], axis = 1)

In [None]:
franeitalia_s =  simplify_inventory(inventorydf = franeitalia, 
                                inventoryname = 'FraneItalia version 3', 
                                  inventory_id = 'ID', 
                                  trigger = 'trigger', 
                                  lstype = 'type',
                                  material = 'material',                                
                                  spat_unc = 'spat_unc', 
                                  src_url = 'https://franeitalia.wordpress.com/database/', 
                                  date_accessed = '20220101', 
                                  citation = """Calvello, M., Pecoraro, G. FraneItalia: a catalog of recent Italian landslides.
                                  Geoenviron Disasters 5, 13 (2018). https://doi.org/10.1186/s40677-018-0105-5""", 
                                  license = 'Creative Commons Attribution 4.0 International https://creativecommons.org/licenses/by/4.0/')

In [None]:
plot_lsinv(franeitalia_s.set_index('DATE'), "FraneItalia")

In [None]:
del franeitalia_material_map, franeitalia_spatial_map, franeitalia_trigger_map, franeitalia_type_map

### New Zealand

**GNS New Zealand Landslide Database**

In [None]:
nz_small = pd.read_csv('/home/luna/Documents/04_Data/20220807_NewZealand/nz_small.csv')
nz_moderate = pd.read_csv('/home/luna/Documents/04_Data/20220807_NewZealand/nz_moderate.csv')
nz_large = pd.read_csv('/home/luna/Documents/04_Data/20220807_NewZealand/nz_moderate.csv')


In [None]:
newzealand = pd.concat([nz_small, nz_moderate, nz_large])

In [None]:
#convert landslide time to datetime 
newzealand['DATE'] = pd.to_datetime(newzealand['Date of Occurrence'], dayfirst = True)


#preserve original index
newzealand['src_index'] = newzealand.index

#reset index to  make sure we have unique and monotonic values
newzealand.reset_index(inplace = True, drop = True)

#create a geodataframe

newzealand = gpd.GeoDataFrame(data = newzealand, 
                              geometry = gpd.points_from_xy(newzealand['X Co-ordinate'], 
                                                          newzealand['Y Co-ordinate']), 
                                crs = 'EPSG:4326')

In [None]:
#assign triggers to uniform naming scheme

newzealand_trigger_map = {'unknown':'Unknown', 
                          'Earthquake':'Earthquake',
                          'Rainfall':'Precipitation',
                          'other':'Other'}

newzealand['trigger'] = newzealand.apply(lambda row:newzealand_trigger_map[row['Trigger']], axis = 1)

In [None]:
#assign type to uniform naming scheme
newzealand_type_map = {'Fall':'Fall', 
                       'Rotational slide (slump)':'Slide',
                       'Translational slide':'Slide',
                       'not collected':'Unknown',
                       'Topple':'Topple',
                       'unknown':'Unknown',
                       'Flow':'Flow',
                       'Subsidence':'Other',
                       'Creep':'Other',
                       'other':'Other',
                       'Lateral spread':'Spread',
                       'Avalanche':'Other',
                       'Rockfall':'Fall',
                       'Rock and Debris slide':'Slide',
                       'Debris avalanche':'Flow',
                       'Soil and Debris slide':'Slide',
                       'Complex':'Complex'}


newzealand['type'] = newzealand.apply(lambda row:newzealand_type_map[row['Movement Type']], axis = 1)

In [None]:
#assign material to uniform naming scheme
newzealand_material_map = {'Rock':'Rock', 
                           'Soil':'Earth', 
                           'unknown':'Unknown', 
                           'not collected':'Unknown', 
                           'other':'Other', 
                           'Mixture':'Other'}


newzealand['material'] = newzealand.apply(lambda row:newzealand_material_map[row['Material Type']], axis = 1)


In [None]:
newzealand['spat_unc'] = 'Exact' #data source given is landslide GIS, so these should be the actual points of 
#occurrence

In [None]:
newzealand_s =  simplify_inventory(inventorydf = newzealand, 
                                inventoryname = 'GNS New Zealand Landslide Database', 
                                  inventory_id = 'Landslide ID', 
                                  trigger = 'trigger', 
                                  lstype = 'type',
                                  material = 'material',                                
                                  spat_unc = 'spat_unc', 
                                  src_url = 'https://data.gns.cri.nz/landslides/index.html', 
                                  date_accessed = '20220101', 
                                  citation = """Rosser, B., Dellow, S., Haubrock, S., & Glassey, P. (2017). 
                                  New Zealand’s National Landslide Database. Landslides, 
                                  14(6), 1949–1959. https://doi.org/10.1007/s10346-017-0843-6""", 
                                  license = 'Creative Commons Attribution 4.0 International https://creativecommons.org/licenses/by/4.0/')

In [None]:
plot_lsinv(newzealand_s.set_index('DATE'), 'GNS New Zealand Landslide Database')

### Rwanda

**Landslide Inventory Rwanda**

In [None]:
rwanda = pd.read_excel('../Landslide inventory Rwanda up to 19May 2021.xlsx')

In [None]:
#remove time info for one date range
rwanda.loc[rwanda['event_date'] == '3-5/5/2013', 'event_date'] = None

In [None]:
#convert landslide time to datetime 
rwanda['DATE'] = pd.to_datetime(rwanda['event_date'])

#preserve original index
rwanda['src_index'] = rwanda.index

#reset index to  make sure we have unique and monotonic values
rwanda.reset_index(inplace = True, drop = True)

#create a geodataframe

rwanda = gpd.GeoDataFrame(data = rwanda, 
                              geometry = gpd.points_from_xy(rwanda.longitude, 
                                                          rwanda.latitude), 
                                crs = 'EPSG:4326')

In [None]:
#assign triggers to uniform naming scheme

rwanda_trigger_map = {'Rain':'Precipitation', 
                      'Unknown':'Unknown', 
                      'downpour':'Precipitation', 
                      'Mine':'Human Activity', 
                      'Flooding':'Other', 
                      'mining':'Human Activity', 
                      np.nan:'Unknown',
                      'rain':'Precipitation',
                      'continuous_rain':'Precipitation',
                      'Rain and groundwater':'Precipitation'}


rwanda['trigger'] = rwanda.apply(lambda row:rwanda_trigger_map[row['landslide_trigger']], axis = 1)

In [None]:
#assign type to uniform naming scheme

rwanda_type_map = {'Debris flow':'Flow', 
                   'Landslide':'Slide',
                   'Mudslide':'Slide',
                   'landslide':'Slide',
                   'mudslide':'Slide',
                   'Rockslide':'Slide',
                   'Other':'Other',
                   'Mudslides ':'Slide',
                   'Mudslide ':'Slide',
                   np.nan:'Unknown',
                   'Debris flow/Rock fall':'Complex',
                   'debris_flow':'Flow',
                   'Mud/earth flow':'Flow',
                   'Mud flow':'Flow'}

rwanda['type'] = rwanda.apply(lambda row:rwanda_type_map[row['landslide_category']], axis = 1)

In [None]:
#assign material to uniform naming scheme

rwanda_material_map = {'Debris flow':'Debris', 
                   'Landslide':'Earth',
                   'Mudslide':'Earth',
                   'landslide':'Earth',
                   'mudslide':'Earth',
                   'Rockslide':'Rock',
                   'Other':'Other',
                   'Mudslides ':'Earth',
                   'Mudslide ':'Earth',
                   np.nan:'Unknown',
                   'Debris flow/Rock fall':'Other',
                   'debris_flow':'Debris',
                   'Mud/earth flow':'Earth',
                   'Mud flow':'Earth'}

rwanda['material'] = rwanda.apply(lambda row:rwanda_material_map[row['landslide_category']], axis = 1)

In [None]:
#assign spatial uncertainty to uniform naming scheme

rwanda_uncert_map = {'5km':'<5km',
                     '5Km':'<5km',
                   'unknown':'Unknown',
                   '50km':'<50km',
                   'exact':'Exact',
                   '1km':'<1km',
                     '15km':'<25km',
                   '25km':'<25km',
                   '10km':'<10km',
                   '100km':'<100km',
                   '250km':'<250km',
                   'Unknown':'Unknown',
                   'Known exactly':'Exact',
                   np.nan:'Unknown'}

rwanda['spat_unc'] = rwanda.apply(lambda row:rwanda_uncert_map[row['location_accuracy']], axis = 1)

In [None]:
rwanda_s =  simplify_inventory(inventorydf = rwanda, 
                                inventoryname = "Landslide Inventory Rwanda", 
                                  inventory_id = 'event_id', 
                                  trigger = 'trigger', 
                                  lstype = 'type',
                                  material = 'material',
                                  spat_unc = 'spat_unc', 
                                  src_url = 'https://doi.org/10.4121/15040446.v1', 
                                  date_accessed = '20220805', 
                                  citation = """Uwihirwe, Judith (2021): Data underlying the research of Integration of observed and model derived groundwater levels in landslide threshold models in Rwanda. 
                                              4TU.ResearchData. Dataset. https://doi.org/10.4121/15040446.v1""", 
                                  license = 'CC0 1.0 Universal (CC0 1.0) https://creativecommons.org/publicdomain/zero/1.0/')

In [None]:
plot_lsinv(rwanda_s.set_index('DATE'), "Landslide Inventory Rwanda")

In [None]:
del rwanda_material_map, rwanda_uncert_map, rwanda_trigger_map, rwanda_type_map

### United States

#### Seattle Historic Landslide Locations ECA

In [None]:
#read Seattle `
seattle = gpd.read_file('../Historic_Landslide_Locations_ECA.shp')


In [None]:
def seattle_todatetime(row):
    
    #take only rows where we're confident that the date is correct
    
    if (row['DAY_'] is not None) & (row['DATECONFID'] == 'True'): 
            
        try:
            
            t = row[['YEAR_', 'MONTH_', 'DAY_']].values.astype(int)

            d = pd.to_datetime(datetime.datetime(*t))
            
        except:
            d = np.nan
                 
    else:
        d = np.nan
        
    return d

seattle["DATE"] = seattle.apply(lambda row:seattle_todatetime(row), axis = 1)


In [None]:
#preserve original index
seattle['src_index'] = seattle.index

#reset index to make sure we have unique and monotonic values
seattle.reset_index(inplace = True, drop = True)

#check if the crs is WGS84, and if not, reproject it

if seattle.crs != 'EPSG:4326':
    seattle.to_crs('EPSG:4326', inplace = True)
    

In [None]:
#trigger 

#Natural = trigger mechanism as being natural (Y) or human (N).  Precipitation is considered a natural trigger 
#whereas pipe breaks and excessive lawn watering are not #Natural = Y

#groundwater/surface water = Y

#null is "Unknown", #Natural_ = N is 'Human Activity'

def seattle_trigger(row):
    
    #landslides are categorized as precipitation triggered where Natural is Y and groundwater/surface water is Y.
    #this is conservative and should remove any cases of human activity or earthquakes
    
    if (row['NATURAL_'] == 'Y') & ((row['SURFACEWAT'] == "Y") | (row['GRNDWTR'] == 'Y')):
        
        trigger = 'Precipitation'
        
        
    elif row['NATURAL_'] == 'N':
        
        trigger = 'Human Activity'
        
    else: 
        
        trigger = "Unknown"
        
    return trigger


In [None]:
seattle['trigger'] = seattle.apply(lambda row:seattle_trigger(row), axis = 1)

In [None]:
def seattle_type(row):
    
    #if it's a debris flow, assign flow
    
    if row['DEBRISFLOW'] == 'Y':
        
        typ = "Flow"
        
    #Landslide TYPE
        #HBP = high bluff peeloff = Slide
        #SC = shallow colluvial = Slide
        #DS = deep-seated = Slide
        #BO = groundwater blowout = Slide 
    elif row['SLIDETYPE'] in ['HBP', 'SC', 'BO', 'DS']:
        
        typ = "Slide"
        
    else: 
        typ = 'Unknown'
        
    return typ



In [None]:
seattle['type'] = seattle.apply(lambda row:seattle_type(row), axis = 1)

In [None]:
def seattle_material(row):
    
    #if it's a debris flow, assign flow
    
    if row['DEBRISFLOW'] == 'Y':
        
        material = "Debris"
        
    #Landslide TYPE
        #HBP = high bluff peeloff = Slide
        #SC = shallow colluvial = Slide
        #DS = deep-seated = Slide
        #BO = groundwater blowout = Slide 
    elif row['SLIDETYPE'] in ['HBP', 'SC', 'BO', 'DS']:
        
        material = 'Earth'
        
    else: 
        material = 'Unknown'
        
    return material


In [None]:
seattle['material'] = seattle.apply(lambda row:seattle_material(row), axis = 1)

In [None]:
def seattle_spat(row):
    
    #if it's field checked, the location should be exact
    
    if row['FIELDCHECK'] == 'True': 
        
        spat = 'Exact'
    
    #otherwise, it should still be within a km, these are from reports
    else: 
        spat = '<1km'
        
    return spat
    

In [None]:
seattle['spat_unc'] = seattle.apply(lambda row:seattle_spat(row), axis = 1)

In [None]:
seattle_s =  simplify_inventory(inventorydf = seattle, 
                                inventoryname = 'Seattle Historic Landslide Locations ECA', 
                                  inventory_id = 'OBJECTID', 
                                  trigger = 'trigger', 
                                  lstype = 'type',
                                  material = 'material',
                                  spat_unc = 'spat_unc', 
                                  src_url = 'https://data-seattlecitygis.opendata.arcgis.com/datasets/6ac72973a5784d90bda0a5f8a001d9f3_22/explore?location=47.616250%2C-122.328600%2C11.91', 
                                  date_accessed = '20220725', 
                                  citation = """City of Seattle. (2022). Historical Landslide Locations ECA. City of Seattle.""", 
                                  license = 'PDDL License')


In [None]:
plot_lsinv(seattle.set_index("DATE"), 'Seattle')

#### Digital Geodata Series DGS06-3 Landslides in New Jersey

In [None]:
newjersey = gpd.read_file('/home/luna/Documents/04_Data/20220806_NewJersey/dgs06_3_njlandslides.gdb')

In [None]:
#convert landslide time to datetime 
newjersey['DATE'] = newjersey['DAY']

#remove strange dates
newjersey.loc[newjersey['DAY']=='42479', 'DATE'] = None
newjersey.loc[newjersey['DAY']=='42494', 'DATE'] = None
newjersey.loc[newjersey['DAY']=='Unknown', 'DATE'] = None

newjersey['DATE'] = pd.to_datetime(newjersey['DATE'])

#preserve original index
newjersey['src_index'] = newjersey.index

#reset index to  make sure we have unique and monotonic values
newjersey.reset_index(inplace = True, drop = True)

#check if the crs is WGS84, and if not, reproject it

if newjersey.crs != 'EPSG:4326':
    newjersey.to_crs('EPSG:4326', inplace = True)


In [None]:
#assign triggers to uniform naming scheme

newjersey_trigger_map = {'Heavy rain':'Precipitation', 
                         'Fossil digging':'Human Activity',
                         'Weathering':'Other',
                         'Road construction removed toe':'Human Activity',
                         'Heavy rain/weathering':'Precipitation',
                         'Construction':'Human Activity',
                         'Clay digging':'Human Activity',
                         'Heavy rain/poor drainage':'Precipitation',
                         'Heavy rain/broken sewer pipe':'Precipitation',
                         'Fill material failure':'Human Activity',
                         'Quarrying':'Human Activity',
                         'Heavy rain/snowmelt':'Precipitation',
                         'Atlantic Ocean wave action':'Other',
                         'Vibration from railroad':'Human Activity',
                         'River erosion':'Other',
                         'Tree removal from slope':'Human Activity',
                         'Mining':'Human Activity',
                         'Unknown':'Unknown',
                         'Heavy rain/broken storm drain':'Precipitation',
                         'Water main break':'Human Activity'}


newjersey['trigger'] = newjersey.apply(lambda row:newjersey_trigger_map[row['TRIGGER_']], axis = 1)


In [None]:
#assign type to uniform naming scheme
newjersey_type_map = {'Rockslide':'Slide', 
                      'Debris flow':'Flow',
                      'Rockfall':'Fall',
                      'Slump':'Other'}


newjersey['type'] = newjersey.apply(lambda row:newjersey_type_map[row['TYPE']], axis = 1)

In [None]:

#assign material to uniform naming scheme
newjersey_material_map = {'Rockslide':'Rock', 
                          'Debris flow':'Debris',
                          'Rockfall':'Rock',
                          'Slump':'Earth'}


newjersey['material'] = newjersey.apply(lambda row:newjersey_material_map[row['TYPE']], axis = 1)

In [None]:
#assign spatial uncertainty to uniform naming scheme

newjersey_uncert_map = {'Aerial photo':'<1km', 
                        'GPS':'Exact', 
                        'Air Photo':'<1km', 
                        ' ':'Unknown', 
                        ' Air Photo':'<1km',
                        'Air Photo ':'<1km'}

newjersey['spat_unc'] = newjersey.apply(lambda row:newjersey_uncert_map[row['LOCATION']], axis = 1)

In [None]:
newjersey_s =  simplify_inventory(inventorydf = newjersey, 
                                inventoryname = 'Digital Geodata Series DGS06-3 Landslides In New Jersey', 
                                  inventory_id = 'ID', 
                                  trigger = 'trigger', 
                                  lstype = 'type',
                                  material = 'material',                                
                                  spat_unc = 'spat_unc', 
                                  src_url = 'https://www.state.nj.us/dep/njgs/geodata/dgs06-3.htm', 
                                  date_accessed = '20220806', 
                                  citation = """New Jersey Department of Environmental Protection (NJDEP), New Jersey Geological Survey (NJGS) (2018). Landslides In New Jersey, Series DGS06-3, Edition 20180711 (Geol_landslide)""", 
                                  license = """Digital data received from the NJDEP may not be reproduced or redistributed without all the metadata provided.
3. Any maps, publications, reports, or other documents produced as a result of this project that utilize this digital data will credit the NJDEP's Geographic Information System (GIS) as the source of the data with the following credit/disclaimer: "This (map/publication/report) was developed using New Jersey Department of Environmental Protection Geographic Information System digital data, but this secondary product has not been verified by NJDEP and is not state-authorized or endorsed.""")


In [None]:

plot_lsinv(newjersey_s.set_index('DATE'), "Digital Geodata Series DGS06-3 Landslides In New Jersey")

#### Kentucky Geological Survey Landslide Inventory

In [None]:
kentucky = gpd.read_file('/home/luna/Documents/04_Data/20220906_Kentucky/KGS_Landslide_Inventory_exp.gdb', 
                        layer = 'KGS_landslide_inventory_data')

In [None]:
kentucky.head()

In [None]:
#convert landslide time to datetime 
kentucky['DATE'] = pd.to_datetime(kentucky['FailureDate'])


#preserve original index
kentucky['src_index'] = kentucky.index

#reset index to  make sure we have unique and monotonic values
kentucky.reset_index(inplace = True, drop = True)

#check if the crs is WGS84, and if not, reproject it

if kentucky.crs != 'EPSG:4326':
    kentucky.to_crs('EPSG:4326', inplace = True)


In [None]:
#assign triggers to uniform naming scheme

kentucky_trigger_map = {None:'Unknown', 
                        'heavy rainfall':'Precipitation',
                        'removal of toe (erosion)':'Other',
                        'renoval of toe (erosion)':'Other',
                        'saturated ground':'Other',
                        'abandoned mine filled up with water':'Human Activity',
                        'excessive water, dipping bedrock':'Other',
                        'oversteepening of slope':'Human Activity',
                        'multiple factors':"Other",
                        'saturated ground, overloading with fill':'Human Activity',
                        'removal of toe (excavation)':'Human Activity',
                        'excavation of ditchline, talus colluvium at base of mountain':'Human Activity',
                        'improper compaction and grading':'Human Activity',
                        'high infiltration rates-seepage':'Precipitation',
                        'flooding (rapid drawdown)':'Other',
                        'deterioration of fill in construction':'Human Activity',
                        'movement along saturaed soil-dipping bedrock interface':'Other',
                        'large amounts of groundwater, jointing, dip of rocks, sliding on shale':'Other',
                        'erosion and rapid drawdown':'Other',
                        'excavation of material from hill during construction':'Human Activity',
                        'excavation of toe of existing slope':'Human Activity',
                        'rapid drawdown of river after heavy rains, erosion of toe':'Other',
                        'surface drainage':'Other', 
                        'drawdown conditions of Rough River Lake':'Other',
                        'blocked drainage structures':'Other',
                        'reduction in shear strength of embankment material, saturation at base':'Other',
                        'freeze thaw, cutbank':'Other',
                        'subsurface drainage':'Other',
                        'weathering of clay-like shale':'Other',
                        'rain and snowfall':'Precipitation',
                        'saturated fill, settlement, washing out of fines':'Other',
                        'heavy rainfall, snowmelt':'Precipitation', 
                        'heavy rainfall, slope modification':'Precipitation',
                        'weathered shale at base of slope, resistant sandstone above':'Other',
                        'erosion of culvert':'Other', 
                        'dip slope':'Other',
                        'excess water and rapid drawdown':'Other',
                        'cutbank':'Other'}


kentucky['trigger'] = kentucky.apply(lambda row:kentucky_trigger_map[row['Contributing_Factor']], axis = 1)



In [None]:
#assign type to uniform naming scheme
kentucky_type_map = {'landslide':'Slide', 
                     'fall':'Fall',
                     'slide':'Slide',
                     None:'Unknown',
                     'flow':'Flow',
                     'rockslide':'Slide'}


kentucky['type'] = kentucky.apply(lambda row:kentucky_type_map[row['General_Type']], axis = 1)

In [None]:
#assign material to uniform naming scheme
kentucky_material_map = {None:'Unknown', 
                         'soil/mud':'Earth',
                         'rock':'Rock',
                         'weathered rock':'Rock',
                         'fill':'Earth',
                         'soil and rock':'Earth',
                         'earth':'Earth',
                         'mine spoil':'Other'}


kentucky['material'] = kentucky.apply(lambda row:kentucky_material_map[row['Material']], axis = 1)


In [None]:
kentucky['Confidence'].unique()

8 – High confidence that the nature and/or spatial extent of the landslide is well characterized
This highest confidence level is typically based on detailed field observations and/or expert analysis of high-resolution topographic data or aerial imagery to characterize the landslide.

5 – Confident that a consequential landslide took place at the specified location
This level of characterization still involves high confidence that a landslide took place at the specified location as evidenced by fatalities and/or damage to infrastructure, but detailed observations of landslide features are not described in the geodatabase.

3 - Landslide likely at or near the specified location
This middle confidence level reflects a known landslide occurred with lower certainty on the exact position or nature of the slope failure. These typically include verified landslides on lower resolution topographic maps or aerial imagery and landslide data that predates digital topography and precise global positioning systems 

In [None]:
# 8 = 'Exact'
# 5 = 'Exact'
# 3 = '<1km'


#assign spatial uncertainty to uniform naming scheme

kentucky_uncert_map = {8:'Exact', 
                      5:'Exact', 
                      3:'<1km'}

kentucky['spat_unc'] = kentucky.apply(lambda row:kentucky_uncert_map[row['Confidence']], axis = 1)

In [None]:
kentucky_s =  simplify_inventory(inventorydf = kentucky, 
                                inventoryname = 'Kentucky Geological Survey Landslide Inventory', 
                                  inventory_id = 'ID', 
                                  trigger = 'trigger', 
                                  lstype = 'type',
                                  material = 'material',                                
                                  spat_unc = 'spat_unc', 
                                  src_url = 'https://doi.org/10.13023/kgs.data.2022.01', 
                                  date_accessed = '20220906', 
                                  citation = """Crawford, M.M., 2022. Kentucky Geological Survey landslide inventory [2022-01]: Kentucky Geological Survey Research Data, https://doi.org/10.13023/kgs.data.2022.01.""", 
                                  license = """Creative Commons Attribution 4.0 International License (https://creativecommons.org/licenses/by/4.0/)""")


In [None]:
plot_lsinv(kentucky_s.set_index('DATE'), "Kentucky Geological Survey Landslide Inventory")

### Combine

In [None]:
#concatenate individual inventories
compilation = pd.concat([nasaglc_s, 
                         gfld_s,
                         australia_s,
                         colombia_s,
                         franeitalia_s,
                         kentucky_s,
                         newjersey_s,
                         newzealand_s,
                         rwanda_s, 
                         seattle_s], 
                         axis = 0)

#reset index
compilation.reset_index(inplace = True, drop = True)


In [None]:
def remove_tzaware(row):
    
    if row['DATE'].tzinfo is None:
        d = row['DATE']
        
    else:
        d = row['DATE'].tz_convert(None)
        
    return d
                    

In [None]:
#strip the time stamps of their time zone aware info (because none of these are actually tz aware)
compilation['DATE'] = compilation.apply(lambda row:remove_tzaware(row), axis = 1)

In [None]:
plot_lsinv(compilation.set_index('DATE'), 'Compilation')

In [None]:
compilation.to_pickle('../01_Data/compilation.pkl')

In [None]:
compilation = pd.read_pickle('../01_Data/compilation.pkl')

### Subset to urban areas, rainfall triggered landslides, and daily time stamp

**Global Human Settlement Layer Urban Centre Database**

Available at http://data.europa.eu/89h/53473144-b88c-44bc-b4a3-4583ed1f547e

Florczyk, A. et al. GHS Urban Centre Database 2015, multitemporal and multidimensional attributes, R2019A, v1.2. https://data.jrc.ec.europa.eu/dataset/53473144-b88c-44bc-b4a3-4583ed1f547e (2019).

In [None]:
#Read GHS UCDB
urban = gpd.read_file('../GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2/GHS_STAT_UCDB2015MT_GLOBE_R2019A/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg', 
                       layer = 'GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2')

In [None]:
### Subset to urban areas

#simplify GHS UCDB - keep naming information

urban = urban.loc[:, ['ID_HDC_G0', 'AREA', 'CTR_MN_ISO', 'GRGN_L1', 'GRGN_L2', 'UC_NM_MN', 'UC_NM_LST', 
                      'CTR_MN_NM', 'geometry']]

#check that the crs are the same 
if compilation.crs == urban.crs:
    
    #join the attributes from the urban areas polygons to the landslide points

    join_comp_urban = gpd.sjoin(compilation, urban, how = "left")

    #get only the landslides that occurred in urban areas
    ls_urban = join_comp_urban[join_comp_urban['ID_HDC_G0'].notna()].copy()
    
else: 
    print("CRS do not match")

In [None]:
#subset to rainfall triggered with time stamps

In [None]:
ls_urban_ts_rf = ls_urban[(ls_urban['trigger'] == 'Precipitation') & (ls_urban['DATE'].notna())].copy()

### Remove duplicates

remove duplicates - a landslide counts as a duplicate when it is within 0.0001 degree and on the same day as another landslide in the compilation 

In [None]:
ls_urban_ts_rf['lat_round'] = ls_urban_ts_rf.geometry.y.round(5)
ls_urban_ts_rf['long_round'] = ls_urban_ts_rf.geometry.x.round(5)

In [None]:
ls_urban_ts_rf_u = ls_urban_ts_rf.loc[~ls_urban_ts_rf.duplicated(subset = ['DATE', 'lat_round', 'long_round'], keep = 'last')].copy()

In [None]:
ls_urban_ts_rf_u.drop(['lat_round', 'long_round'],axis = 1,
                      inplace = True)

### Assign a timestamp at local midnight and convert to UTC

In [None]:
#assign a timestamp at midnight on the day of occurrence local time and convert it to UTC

#set the time of landslide occurrence to 12:00 am local time on the date of occurrence 

ls_urban_ts_rf_u['date_local_midnight'] = pd.to_datetime(ls_urban_ts_rf_u['DATE'].dt.strftime('%Y-%m-%d'))

#get the landslide's timezone from it's latitude and longitude

#initialize a timezone finder object
tf = TimezoneFinder()

#get timezone for all landslide points from their latitude and longitude
ls_urban_ts_rf_u['timezone'] = ls_urban_ts_rf_u.apply(lambda row: tf.timezone_at(lng=row.geometry.x, lat=row.geometry.y), 
                                            axis = 1)

#localize the local midnight date

ls_urban_ts_rf_u['date_local_midnight'] = ls_urban_ts_rf_u.apply(lambda row: row['date_local_midnight'].tz_localize(row['timezone']), axis = 1)


#convert the local midnight date to UTC

ls_urban_ts_rf_u['date_local_midnight_utc'] = ls_urban_ts_rf_u.apply(lambda row: row['date_local_midnight'].tz_convert('UTC'), axis = 1)


### Save the subset inventory for further analysis

In [None]:
ls_urban_ts_rf_u.to_pickle('ls_urban_ts_rf_u.pkl')

### Save the number of landslides in each city to make Figure 1

In [None]:
urban = gpd.read_file('../GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2/GHS_STAT_UCDB2015MT_GLOBE_R2019A/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg', 
                       layer = 'GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2')


#save this info to make figure 1 bubble plot

urbancoords = urban.loc[:, ['ID_HDC_G0','GCPNT_LAT', 'GCPNT_LON', 
                            'E_WR_P_14', 'EL_AV_ALS', 'E_KG_NM_LST',
                           'B15', 
                           'P15', 'INCM_CMI']]

#save this information for making bubble plot for figure 1

#get count of urban landslides in each city
nls_rf_per_city = ls_urban_ts_rf_u.groupby(['ID_HDC_G0', 'UC_NM_MN', 'CTR_MN_NM']).count()['src_index'].sort_values(ascending = False)

nls_rf_per_city_plot = pd.DataFrame(nls_rf_per_city)

nls_rf_per_city_plot.reset_index(inplace = True)

nls_rf_per_city_plot['nls'] = nls_rf_per_city_plot['src_index']
nls_rf_per_city_plot.drop('src_index', axis = 1, inplace = True)

nls_rf_per_city_plot = pd.merge(nls_rf_per_city_plot, 
                            urbancoords, 
                            how = 'left', 
                            on = 'ID_HDC_G0')


nls_rf_per_city_plot['city'] = nls_rf_per_city_plot.apply(lambda row:''.join(e for e in row['UC_NM_MN'] if e.isalnum()), axis = 1)


nls_rf_per_city_plot.to_csv('nls_rf_per_city_plot.csv')

