In [None]:
from sedona.spark import *
from pyspark.sql import functions as F
import os

In [None]:
config = SedonaContext.builder().getOrCreate()

sedona = SedonaContext.create(config)

# Lightning Hotspots

I've always been fascinated by thunderstorms, often sitting outside to listen to the rumble of thunder. This natural phenomenon led me to wonder: **where are the highest frequency hotspots for lightning strikes globally?** This notebook aims to answer that question.

## National Lightning Detection Network (NLDN) Data

The data used for this analysis was sourced from the National Lightning Detection Network (NLDN) database: [https://www1.ncdc.noaa.gov/pub/data/swdi/database-csv/v2/](https://www1.ncdc.noaa.gov/pub/data/swdi/database-csv/v2/).

### Data Preparation Note

The only preparation step performed on the raw CSV (aside of extraction `gunzip nldn-tiles-2024.csv.gz`) was removing the leading `#` from the column headers to ensure Spark correctly processes the schema without skipping column names.

In [None]:
lightning_df = (sedona.read.format("csv").option("header", True).option("inferSchema", True).option("comment", "#")
                 .load(f"{os.getenv('USER_S3_PATH')}30-map-challenge/day-25/nldn-tiles-2024.csv"))

In [None]:
lightning_df.printSchema()

In [None]:
lightning_df = (
    lightning_df.withColumn("geometry", F.expr("ST_Point(CENTERLON, CENTERLAT)"))
        .withColumn("h3", F.expr("ST_H3CellIDs(geometry, 3, false)[0]"))
)
lightning_df.createOrReplaceTempView("lightning")
lightning_df.show(5)

In [None]:
h3_df = sedona.sql("""

SELECT 
    SUM(TOTAL_COUNT) as TOTAL_COUNT, h3
FROM
    lightning
GROUP BY
    h3


""").cache() # caching because I am using the output multiple times

h3_df.show(5)
h3_df.count()

In [None]:
h3_df = h3_df.withColumn("h3_geometry", F.expr("ST_H3ToGeom(Array(h3))[0]"))

In [None]:
h3_df.show(5)

In [None]:
map_config = {'version': 'v1',
 'config': {'visState': {'filters': [],
   'layers': [{'id': 'x5ggpk',
     'type': 'geojson',
     'config': {'dataId': 'lightning',
      'label': 'lightning',
      'color': [183, 136, 94],
      'highlightColor': [252, 242, 26, 255],
      'columns': {'geojson': 'geometry'},
      'isVisible': True,
      'visConfig': {'opacity': 0.72,
       'strokeOpacity': 0.8,
       'thickness': 0.1,
       'strokeColor': [11, 95, 101],
       'colorRange': {'name': 'ColorBrewer RdYlBu-10',
        'type': 'diverging',
        'category': 'ColorBrewer',
        'colors': ['#313695',
         '#4575b4',
         '#74add1',
         '#abd9e9',
         '#e0f3f8',
         '#fee090',
         '#fdae61',
         '#f46d43',
         '#d73027',
         '#a50026'],
        'reversed': True},
       'strokeColorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
         '#FFC300']},
       'radius': 10,
       'sizeRange': [0, 10],
       'radiusRange': [0, 50],
       'heightRange': [0, 500],
       'elevationScale': 60,
       'enableElevationZoomFactor': True,
       'stroked': True,
       'filled': True,
       'enable3d': True,
       'wireframe': False},
      'hidden': False,
      'textLabel': [{'field': None,
        'color': [255, 255, 255],
        'size': 18,
        'offset': [0, 0],
        'anchor': 'start',
        'alignment': 'center',
        'outlineWidth': 0,
        'outlineColor': [255, 0, 0, 255],
        'background': False,
        'backgroundColor': [0, 0, 200, 255]}]},
     'visualChannels': {'colorField': {'name': 'TOTAL_COUNT',
       'type': 'integer'},
      'colorScale': 'quantile',
      'strokeColorField': None,
      'strokeColorScale': 'quantile',
      'sizeField': None,
      'sizeScale': 'linear',
      'heightField': {'name': 'TOTAL_COUNT', 'type': 'integer'},
      'heightScale': 'sqrt',
      'radiusField': None,
      'radiusScale': 'linear'}}],
   'effects': [],
   'interactionConfig': {'tooltip': {'fieldsToShow': {'lightning': [{'name': 'TOTAL_COUNT',
        'format': None},
       {'name': 'h3', 'format': None}]},
     'compareMode': False,
     'compareType': 'absolute',
     'enabled': True},
    'brush': {'size': 0.5, 'enabled': False},
    'geocoder': {'enabled': False},
    'coordinate': {'enabled': False}},
   'layerBlending': 'normal',
   'overlayBlending': 'normal',
   'splitMaps': [],
   'animationConfig': {'currentTime': None, 'speed': 1},
   'editor': {'features': [], 'visible': True}},
  'mapState': {'bearing': 24,
   'dragRotate': True,
   'latitude': 33.72522575694905,
   'longitude': -94.86773774238316,
   'pitch': 50,
   'zoom': 2.8506119441531075,
   'isSplit': False,
   'isViewportSynced': True,
   'isZoomLocked': False,
   'splitMapViewports': []},
  'mapStyle': {'styleType': 'dark-matter',
   'topLayerGroups': {},
   'visibleLayerGroups': {'label': True,
    'road': True,
    'border': False,
    'building': True,
    'water': True,
    'land': True,
    '3d building': False},
   'threeDBuildingColor': [15.035172933000911,
    15.035172933000911,
    15.035172933000911],
   'backgroundColor': [0, 0, 0],
   'mapStyles': {}}}}

In [None]:
m = SedonaKepler.create_map(h3_df, "lightning", map_config)
m