<a href="https://colab.research.google.com/github/mohitsharmas97/rockfall/blob/main/Untitled39.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install the library
!pip install earthaccess -q

import earthaccess
import os

# This will prompt you to log in with your NASA Earthdata account
# It's an easy, one-time step per session.
auth = earthaccess.login(strategy="interactive", persist=True)
if not auth.authenticated:
    print("❌ Authentication failed. Please make sure you have a .netrc file or log in.")

In [None]:
import os
import zipfile
import glob
from tqdm.notebook import tqdm

# Directory where you downloaded the elevation data
data_dir = "nasadem_hotspot_data"

# Find all .zip files in the directory
zip_files = glob.glob(os.path.join(data_dir, '*.zip'))

print(f"Found {len(zip_files)} .zip files to extract...")

# Loop through each zip file and extract it
for file_path in tqdm(zip_files, desc="Unzipping files"):
    try:
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            zip_ref.extractall(data_dir)
    except zipfile.BadZipFile:
        print(f"Warning: Skipping corrupted file: {os.path.basename(file_path)}")

print("\nExtraction complete.")

# Optional but recommended: Clean up the .zip files to save space
print("Cleaning up .zip archives...")
for file_path in zip_files:
    os.remove(file_path)

print("✅ Cleanup complete. Your directory now contains the .hgt elevation files.")

Found 0 .zip files to extract...


Unzipping files: 0it [00:00, ?it/s]


Extraction complete.
Cleaning up .zip archives...
✅ Cleanup complete. Your directory now contains the .hgt elevation files.


In [None]:
# ===================================================================
# SETUP: Install and import all necessary libraries
# ===================================================================
!pip install earthaccess rasterio tqdm -q

import pandas as pd
import earthaccess
import os
import zipfile
import glob
import math
import rasterio
from tqdm.notebook import tqdm

print("✅ Setup complete.")

# ===================================================================
# PART 1: LOAD AND FILTER THE LANDSLIDE CATALOG
# ===================================================================
print("\n--- Part 1: Loading and Filtering Landslide Data ---")
try:
    df = pd.read_csv('/content/Global_Landslide_Catalog_Export_rows.csv')
    # Define the optimal bounding box for the primary hotspot (Himalayas)
    # Format: (Lon Min, Lat Min, Lon Max, Lat Max)
    optimal_bbox = (60.0, -2.0, 98.0, 39.0)

    # Filter the DataFrame to get only the landslides within this optimal box
    df_hotspot = df[
        (df['longitude'] >= optimal_bbox[0]) & (df['longitude'] <= optimal_bbox[2]) &
        (df['latitude'] >= optimal_bbox[1]) & (df['latitude'] <= optimal_bbox[3])
    ].copy()

    percentage = (len(df_hotspot) / len(df)) * 100
    print(f"Loaded {len(df)} total landslides.")
    print(f"Filtered to {len(df_hotspot)} landslides in the optimal boundary ({percentage:.2f}% of total).")

except FileNotFoundError:
    print("❌ ERROR: 'Global_Landslide_Catalog_Export_rows.csv' not found. Please upload the file.")
    df_hotspot = pd.DataFrame() # Create empty dataframe to prevent further errors

# ===================================================================
# PART 2: DOWNLOAD ELEVATION DATA FOR THE BOUNDARY
# ===================================================================
if not df_hotspot.empty:
    print("\n--- Part 2: Downloading Elevation Data ---")
    auth = earthaccess.login(strategy="interactive", persist=True)

    if auth.authenticated:
        print("Searching for elevation data for the hotspot...")
        granules = earthaccess.search_data(
            short_name="NASADEM_HGT",
            version="001",
            bounding_box=optimal_bbox,
            count=-1
        )

        if granules:
            print(f"Found {len(granules)} elevation tiles to download.")
            output_dir = "nasadem_data"
            os.makedirs(output_dir, exist_ok=True)
            files = earthaccess.download(granules, local_path=output_dir)
        else:
            print("No elevation data found for this area.")
            output_dir = None
    else:
        output_dir = None
else:
    output_dir = None

# ===================================================================
# PART 3: UNZIP DOWNLOADED FILES
# ===================================================================
if output_dir and os.path.exists(output_dir):
    print("\n--- Part 3: Unzipping Elevation Files ---")
    zip_files = glob.glob(os.path.join(output_dir, '*.zip'))

    for file_path in tqdm(zip_files, desc="Unzipping files"):
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            zip_ref.extractall(output_dir)
        os.remove(file_path) # Clean up zip to save space

    print("Unzipping complete.")
else:
    print("\nSkipping Part 3: No data was downloaded.")

# ===================================================================
# PART 4: EXTRACT ELEVATION FOR EACH LANDSLIDE
# ===================================================================
if output_dir and os.path.exists(output_dir):
    print("\n--- Part 4: Extracting Elevation from Tiles ---")
    raster_cache = {}

    def get_elevation_from_tile(lat, lon, data_dir):
        if pd.isna(lat) or pd.isna(lon): return None

        lat_hemisphere = 'n' if lat >= 0 else 's'
        lon_hemisphere = 'e' if lon >= 0 else 'w'
        lat_int, lon_int = math.floor(abs(lat)), math.floor(abs(lon))

        tile_filename = f"{lat_hemisphere}{lat_int:02d}{lon_hemisphere}{lon_int:03d}.hgt"
        tile_path = os.path.join(data_dir, tile_filename)

        if not os.path.exists(tile_path): return None

        try:
            if tile_path not in raster_cache:
                raster_cache[tile_path] = rasterio.open(tile_path)
            dem_file = raster_cache[tile_path]
            value = next(dem_file.sample([(lon, lat)]))[0]
            return value if value > -1000 else None
        except Exception:
            return None

    # Apply the function to get elevations
    tqdm.pandas(desc="Getting Elevations")
    df_hotspot['elevation'] = df_hotspot.progress_apply(
        lambda row: get_elevation_from_tile(row['latitude'], row['longitude'], output_dir),
        axis=1
    )

    for file in raster_cache.values(): file.close()

    print("Elevation extraction complete.")

    # ===================================================================
    # PART 5: DISPLAY AND SAVE FINAL RESULTS
    # ===================================================================
    print("\n--- Part 5: Final Results ---")
    # Display the first few rows with the new elevation data
    print("Sample of the final data:")
    print(df_hotspot[['event_title', 'latitude', 'longitude', 'elevation']].head())

    # Save your enriched data to a new CSV file
    output_csv_path = 'landslides_hotspot_with_elevation.csv'
    df_hotspot.to_csv(output_csv_path, index=False)

    print(f"\n✅ All steps complete! Final data saved to '{output_csv_path}'")
else:
    print("\nSkipping Parts 4 & 5: Process could not be completed.")

✅ Setup complete.

--- Part 1: Loading and Filtering Landslide Data ---
Loaded 11033 total landslides.
Filtered to 2529 landslides in the optimal boundary (22.92% of total).

--- Part 2: Downloading Elevation Data ---
Searching for elevation data for the hotspot...
Found 961 elevation tiles to download.


QUEUEING TASKS | :   0%|          | 0/961 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/961 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/961 [00:00<?, ?it/s]


--- Part 3: Unzipping Elevation Files ---


Unzipping files:   0%|          | 0/961 [00:00<?, ?it/s]

Unzipping complete.

--- Part 4: Extracting Elevation from Tiles ---


Getting Elevations:   0%|          | 0/2529 [00:00<?, ?it/s]

Elevation extraction complete.

--- Part 5: Final Results ---
Sample of the final data:
                          event_title  latitude  longitude  elevation
3                    Dailekh district   28.8378    81.7080       1395
32             Kerseong, West Bengal    26.8826    88.2788       1545
34            Gayabari,  West Bengal    26.8572    88.3143       1079
39  Deurali VDC-8 in Kaski district,    28.2039    83.9806        831
40    Dhapade VDC-5, Tanahun district   27.9447    84.2279        559

✅ All steps complete! Final data saved to 'landslides_hotspot_with_elevation.csv'
