In [1]:
import os
import shutil
import logging
import urllib.request
import pandas as pd
import geopandas as gpd
from osgeo import gdal, ogr
from zipfile import ZipFile

In [2]:
# Set up logging
logging.basicConfig(level=logging.INFO)

In [3]:
# Subset
SUBSET = 'ACTIVE'
# File type
FILE_TYPE = 'shapefile'
# Geometry type
GEOMETRY_TYPE = 'points'
# Version
DATASET_VERSION = 'v04r00'
# File extension (without dot)
FILE_EXTENSION = 'zip'

In [4]:
# Dataset URL
base_url = f'https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/'
if FILE_TYPE == 'netcdf':
    url = f'{base_url}/{DATASET_VERSION}/access/{FILE_TYPE}/IBTrACS.{SUBSET}.{DATASET_VERSION}.{FILE_TYPE}'
elif FILE_TYPE == 'csv':
    url = f'{base_url}/{DATASET_VERSION}/access/{FILE_TYPE}/ibtracs.{SUBSET}.list.{DATASET_VERSION}.{FILE_TYPE}'
elif FILE_TYPE == 'shapefile':
    url = f'{base_url}/{DATASET_VERSION}/access/{FILE_TYPE}/IBTrACS.{SUBSET}.list.{DATASET_VERSION}.{GEOMETRY_TYPE}.{FILE_EXTENSION}'
else:
    raise ValueError(f'Unknown file type: {FILE_TYPE}, must be one of: netcdf, csv, shapefile')

# Dataset Filename
filename = os.path.basename(url)

# Data Directory
data_dir = os.path.abspath(os.path.join(
    os.path.dirname(os.path.dirname(os.getcwd())),
    'data', 'IBTrACS', FILE_TYPE, SUBSET.lower()))

# Recreate data directory
logging.info(f'Recreating data directory: {data_dir}')
if os.path.exists(data_dir):
    shutil.rmtree(data_dir)
os.makedirs(data_dir)


INFO:root:Recreating data directory: /Users/mtulow/projects/geospatial_analytics/monitor-tropical-storms/data/IBTrACS/shapefile/active


In [5]:
# Download data
logging.info(f'Downloading {filename} ...')
output_file, msg = urllib.request.urlretrieve(url, filename)

# If shapefile, extract files from archive into data directory 
if FILE_TYPE == 'shapefile':
    logging.info(f'Unzipping {filename} into {data_dir} ...')
    with ZipFile(filename, 'r') as zipObj:
        members = []
        for member in zipObj.namelist():
            members.append(member)
        zipObj.extractall(data_dir)

    # Rename files
    for old in members:
        new = '{}_{}.{}'.format(SUBSET, GEOMETRY_TYPE, old.split('.')[-1]).lower()
        src = os.path.join(data_dir, old)
        dst = os.path.join(data_dir, new)
        os.rename(src, dst)


    # Delete zip file
    logging.info(f'Removing {filename} ...')
    os.remove(filename)

# Construct path to file
if FILE_TYPE == 'netcdf' and FILE_TYPE == 'csv':
    file_path = os.path.join(data_dir, filename)
elif FILE_TYPE == 'shapefile':
    file_path = os.path.join(data_dir, '{}_{}.{}'.format(SUBSET, GEOMETRY_TYPE, 'shp').lower())

# Assert file exists
assert os.path.exists(file_path), f'File {file_path} does not exist'

INFO:root:Downloading IBTrACS.ACTIVE.list.v04r00.points.zip ...
INFO:root:Unzipping IBTrACS.ACTIVE.list.v04r00.points.zip into /Users/mtulow/projects/geospatial_analytics/monitor-tropical-storms/data/IBTrACS/shapefile/active ...
INFO:root:Removing IBTrACS.ACTIVE.list.v04r00.points.zip ...


In [6]:
# Read file
logging.info(f'Reading {file_path} ...')
if FILE_TYPE == 'shapefile':
    shapefile = ogr.Open(file_path)

INFO:root:Reading /Users/mtulow/projects/geospatial_analytics/monitor-tropical-storms/data/IBTrACS/shapefile/active/active_points.shp ...


In [7]:
layer = shapefile.GetLayer()
spatial_ref = layer.GetSpatialRef()
for i in range(layer.GetFeatureCount()):
    feature = layer.GetFeature(i)
    attributes = pd.Series(feature.items())
    name = feature.GetField('NAME')
    geometry = feature.GetGeometryRef()
    print()
    print(f'{i}: {name}\n'\
          f'{spatial_ref}\n'\
          f'{geometry.GetGeometryName()}({geometry.GetX()}, {geometry.GetY()})\n'\
          f'\t{attributes}')


0: ANGGREK
GEOGCS["WGS 84",
    DATUM["WGS_1984",
        SPHEROID["WGS 84",6378137,298.257223563,
            AUTHORITY["EPSG","7030"]],
        AUTHORITY["EPSG","6326"]],
    PRIMEM["Greenwich",0,
        AUTHORITY["EPSG","8901"]],
    UNIT["degree",0.0174532925199433,
        AUTHORITY["EPSG","9122"]],
    AXIS["Latitude",NORTH],
    AXIS["Longitude",EAST],
    AUTHORITY["EPSG","4326"]]
POINT(93.9000015258789, -9.399999618530273)
	SID         2024016S09094
SEASON               2024
NUMBER                  3
BASIN                  SI
SUBBASIN               WA
                ...      
year                 2024
month                   1
day                    15
hour                   12
min                     0
Length: 168, dtype: object

1: ANGGREK
GEOGCS["WGS 84",
    DATUM["WGS_1984",
        SPHEROID["WGS 84",6378137,298.257223563,
            AUTHORITY["EPSG","7030"]],
        AUTHORITY["EPSG","6326"]],
    PRIMEM["Greenwich",0,
        AUTHORITY["EPSG","8901"]],
    UNIT["deg