In [2]:
reset -fs

In [3]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
import glob
import os

In [13]:

column_names_new = ['date', 'time', 'lat','lon']

# column_names_new = ['date',
#                     'time',
#                     'ci',
#                     'mslp',
#                     'vmax',
#                     'fnl_tno',
#                     'adj_raw',
#                     'ini_raw',
#                     'limit',
#                     'weaken',
#                     'rpd_weaken',
#                     'ctr_temp',
#                     'mean_cloud_temp',
#                     'scene',
#                     'est_rmw',
#                     'mw_score',
#                     'lat',
#                     'lon',
#                     'fix_method',
#                     'sat',
#                     'vza',
#                     'comment1',
#                     'comment2',
#                     'comment3',
#                     'comment4',
#                     'comment5',
#                     'comment6']
metadata = []

adts = glob.glob("../2021-adt-hurricanes/*")

for a in adts:
    try:
        adt_new_df = pd.read_csv(a, names=column_names_new, skiprows=1, header=None, delim_whitespace=True, dtype=str)
        adt_new_df['storm_id'] = os.path.basename(a)[0:8]
        if os.path.basename(a)[7]=='L':
            adt_new_df['storm_id'] = adt_new_df['storm_id'].str.replace('_','al').str.replace('L','')
        else:
            adt_new_df['storm_id'] = adt_new_df['storm_id'].str.replace('_','ep').str.replace('E','')
        metadata.append(adt_new_df)
    except ParserError:
        raise Exception('Could not read {}'.format(a))
    
df = pd.concat(metadata, axis=0, ignore_index=True)

   
# df = df[['date','time','vmax','mslp','scene','lat','lon','storm_id']]


# change date column to match image file date format
month_dict = {'JAN': '01',
              'FEB': '02',
              'MAR': '03',
              'APR': '04',
              'MAY': '05',
              'JUN': '06',
              'JUL': '07',
              'AUG': '08',
              'SEP': '09',
              'OCT': '10',
              'NOV': '11',
              'DEC': '12'
             }
df['date'] = df.date.replace(month_dict, regex=True)
df['datedt'] = pd.to_datetime(df['date'].astype('str'), format='%Y%m%d')
df['julian'] = df.datedt.dt.strftime('%Y%j')
df['time'] = df.time.astype('str').str[:-2]
df['timecode'] = df.julian + df.time
df['timecode'] = df.timecode.str[0:10]
#df['jday'] = (x.day_of_year for x in df.timestamp)

# df['cat'] = (['TD' if x<=33
#                      else 'TS' if 34<=x<=63
#                      else 'CAT1' if 64<=x<=82
#                      else 'CAT2' if 83<=x<=95
#                      else 'CAT3' if 96<=x<=112
#                      else 'CAT4' if 113<=x<=136
#                      else 'CAT5'
#                      for x in df['vmax'].astype('float')])

# add year
df['year'] = df.date.str[:4]

# add day
df['day'] = df.timecode.str[4:7]

#add hour
df['hour'] = df.timecode.str[7:9]

#fix lon
df['lon'] = '-' + df['lon'].astype(str)

# add netcdf prefixes for M1
df['m1_combined'] = 's3://noaa-goes16/ABI-L2-MCMIPM/'+df.year+'/'+df.day+'/'+df.hour+'/OR_ABI-L2-MCMIPM1-M6_G16_s'+df.timecode
df['m1_c13'] = f's3://noaa-goes16/ABI-L1b-RadM/'+df.year+'/'+df.day+'/'+df.hour+'/OR_ABI-L1b-RadM1-M6C13_G16_s'+df.timecode

# TODO: add M2
df['m2_combined'] = df.m1_combined.str.replace('M1','M2')
df['m2_c13'] = df.m1_c13.str.replace('M1','M2')

# add datetime
df['timestamp'] = pd.to_datetime(df.date + df.time)

# add storm name
storm_dict = {'2021al01': 'Tropical Storm Ana',
              '2021al02': 'Tropical Storm Bill',
              '2021al03': 'Tropical Storm Claudette',
              '2021al04': 'Tropical Storm Danny',
              '2021al05': 'Hurricane Elsa',
              '2021al06': 'Tropical Storm Fred',
              '2021al07': 'Hurricane Grace',
              '2021al08': 'Hurricane Henri',
              '2021al09': 'Hurricane Ida',
              '2021al10': 'Tropical Storm Kate',
              '2021al11': 'Tropical Storm Julian',
              '2021al12': 'Hurricane Larry',
              '2021al13': 'Tropical Storm Mindy',
              '2021al14': 'Hurricane Nicholas',
              '2021al15': 'Tropical Storm Odette',
              '2021al16': 'Tropical Storm Peter',
              '2021al17': 'Tropical Storm Rose',
              '2021al18': 'Hurricane Sam',
              '2021al19': 'Subtropical Storm Teresa',
              '2021al20': 'Tropical Storm Victor',
              '2021al21': 'Tropical Storm Wanda'
             }
df['storm_name'] = df.storm_id
df['storm_name'] = df.storm_name.replace(storm_dict, regex=True)
df = df.sort_values('timestamp').reset_index(drop=True)

# rearrange and drop extra columns
df = df[['storm_id','storm_name','timestamp','timecode', 'lat', 'lon', 'm1_combined', 'm1_c13','m2_combined', 'm2_c13']]

hurricanes = ['2021al05','2021al07','2021al08''2021al09','2021al14','2021al18']

df_hurr = df[df['storm_id'].isin(hurricanes)]
df_hurr.to_csv('storm_data.csv', index=False)
df_hurr


Unnamed: 0,storm_id,storm_name,timestamp,timecode,lat,lon,m1_combined,m1_c13,m2_combined,m2_c13
0,2021al05,Hurricane Elsa,2021-06-30 20:00:00,2021181200,9.53,-43.40,s3://noaa-goes16/ABI-L2-MCMIPM/2021/181/20/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/181/20/OR_A...,s3://noaa-goes16/ABI-L2-MCMIPM/2021/181/20/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/181/20/OR_A...
1,2021al05,Hurricane Elsa,2021-06-30 20:30:00,2021181203,9.57,-43.55,s3://noaa-goes16/ABI-L2-MCMIPM/2021/181/20/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/181/20/OR_A...,s3://noaa-goes16/ABI-L2-MCMIPM/2021/181/20/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/181/20/OR_A...
2,2021al05,Hurricane Elsa,2021-06-30 21:00:00,2021181210,9.60,-43.70,s3://noaa-goes16/ABI-L2-MCMIPM/2021/181/21/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/181/21/OR_A...,s3://noaa-goes16/ABI-L2-MCMIPM/2021/181/21/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/181/21/OR_A...
3,2021al05,Hurricane Elsa,2021-06-30 21:30:00,2021181213,9.63,-43.85,s3://noaa-goes16/ABI-L2-MCMIPM/2021/181/21/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/181/21/OR_A...,s3://noaa-goes16/ABI-L2-MCMIPM/2021/181/21/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/181/21/OR_A...
4,2021al05,Hurricane Elsa,2021-06-30 22:00:00,2021181220,9.67,-44.01,s3://noaa-goes16/ABI-L2-MCMIPM/2021/181/22/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/181/22/OR_A...,s3://noaa-goes16/ABI-L2-MCMIPM/2021/181/22/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/181/22/OR_A...
...,...,...,...,...,...,...,...,...,...,...
2956,2021al18,Hurricane Sam,2021-10-05 15:30:00,2021278153,51.96,-39.18,s3://noaa-goes16/ABI-L2-MCMIPM/2021/278/15/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/278/15/OR_A...,s3://noaa-goes16/ABI-L2-MCMIPM/2021/278/15/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/278/15/OR_A...
2957,2021al18,Hurricane Sam,2021-10-05 16:00:00,2021278160,51.99,-39.14,s3://noaa-goes16/ABI-L2-MCMIPM/2021/278/16/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/278/16/OR_A...,s3://noaa-goes16/ABI-L2-MCMIPM/2021/278/16/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/278/16/OR_A...
2958,2021al18,Hurricane Sam,2021-10-05 16:30:00,2021278163,52.02,-39.09,s3://noaa-goes16/ABI-L2-MCMIPM/2021/278/16/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/278/16/OR_A...,s3://noaa-goes16/ABI-L2-MCMIPM/2021/278/16/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/278/16/OR_A...
2959,2021al18,Hurricane Sam,2021-10-05 17:00:00,2021278170,52.05,-39.03,s3://noaa-goes16/ABI-L2-MCMIPM/2021/278/17/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/278/17/OR_A...,s3://noaa-goes16/ABI-L2-MCMIPM/2021/278/17/OR_...,s3://noaa-goes16/ABI-L1b-RadM/2021/278/17/OR_A...


In [30]:
# Use boto3 to get urls

import s3fs
import requests
import netCDF4
storm_dataset = pd.read_csv('storm_data.csv', parse_dates=['timestamp'])
storm_name = 'Hurricane Sam'
df = storm_dataset[storm_dataset['storm_name'].eq(storm)].reset_index(drop=True)
print(df.m1_c13[0])
fs = s3fs.S3FileSystem(anon=True)
f = fs.glob(f'{df.m1_c13[0]}*.nc')[0]
key = f[12:]
resp = requests.get(f'https://noaa-goes16.s3.amazonaws.com/{key}')
file_name = key.split('/')[-1].split('.')[0]
nc4_ds = netCDF4.Dataset(file_name, memory = resp.content)
store = xr.backends.NetCDF4DataStore(nc4_ds)

xr.open_dataset(store)

s3://noaa-goes16/ABI-L1b-RadM/2021/265/20/OR_ABI-L1b-RadM1-M6C13_G16_s2021265200


In [None]:
key = 'ABI-L2-MCMIPM/2021/241/14/OR_ABI-L2-MCMIPM1-M6_G16_s20212411400278_e20212411400347_c20212411400421.nc'
resp = requests.get(f'https://noaa-goes16.s3.amazonaws.com/{key}')
file_name = key.split('/')[-1].split('.')[0]
nc4_ds = netCDF4.Dataset(file_name, memory = resp.content)
store = xr.backends.NetCDF4DataStore(nc4_ds)



In [25]:
import xarray as xr
xr.open_dataset(f)


FileNotFoundError: [Errno 2] No such file or directory: b'/Users/noether/Metis/projects/Engineering/final/noaa-goes16/ABI-L1b-RadM/2021/265/20/OR_ABI-L1b-RadM1-M6C13_G16_s20212652000279_e20212652000348_c20212652000389.nc'