In [1]:
import xarray as xr
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import requests
import time

In [3]:
# Download Example
url_ex = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/jplMURSST41.nc?analysed_sst%5B(2002-06-01T09:00:00Z)%5D%5B(-89.99):1000:(89.99)%5D%5B(-179.99):1000:(180.0)%5D'

nc = requests.get(url_ex)

with open(r'/Users/opopligher1996/workspace/master/BusinessIntelligenceTechniquesAndApplications_DSME6756/project/DSME6756_Group_Project/Section_1/windspeed_timeseries/dataset/dataset.nc', 'wb') as f:
    f.write(nc.content)

In [4]:
# Define gfs Download setting
'''
Index Information: https://pae-paha.pacioos.hawaii.edu/erddap/griddap/ncep_global.html
===================================
tmpsfc (surface air temperature, K) 
tmp2m (air temperature at 2m, K) 
ugrd10m (eastward wind velocity at 10m, m/s) 
vgrd10m (northward wind velocity at 10m, m/s) 
pratesfc (rainfall rate, kg m-2 s-1) 
rh2m (relative humidity at 2m, %) 
prmslmsl (mean sea level pressure, Pa) 
dlwrfsfc (net downward longwave radiation flux, W m-2) 
dswrfsfc (net downward shortwave radiation flux, W m-2) 
'''

condition_dict = {'tmpsfc': True,
                  'tmp2m': True,
                  'ugrd10m': True,
                  'vgrd10m':True,
                  'pratesfc':True,
                  'rh2m':True,
                  'prmslmsl':True,
                  'dlwrfsfc':True, 
                  'dswrfsfc':True,
                  'start_date':'(2023-04-25T12:00:00Z)',
                  'end_date':'(2023-04-30T12:00:00Z)',
                  'lat_range':[0,30],
                  'long_range':[118,150]
                 }


# Change setting inside condition_dict directly or add lines as below
# condition_dict["tmpsfc"] = False

print(condition_dict)

{'tmpsfc': True, 'tmp2m': True, 'ugrd10m': True, 'vgrd10m': True, 'pratesfc': True, 'rh2m': True, 'prmslmsl': True, 'dlwrfsfc': True, 'dswrfsfc': True, 'start_date': '(2023-04-25T12:00:00Z)', 'end_date': '(2023-04-30T12:00:00Z)', 'lat_range': [0, 30], 'long_range': [118, 150]}


In [6]:
import os
import time

save_dir = r'/Users/opopligher1996/workspace/master/BusinessIntelligenceTechniquesAndApplications_DSME6756/project/DSME6756_Group_Project/Section_1/windspeed_timeseries/dataset'

def define_url(condition_dict):
    
    output_url = ''
    
    # header url part
    org_url = 'https://pae-paha.pacioos.hawaii.edu/erddap/griddap/ncep_global.nc?'
    # Generate condition url part
    START_DATE = condition_dict.get('start_date')
    END_DATE = condition_dict.get('end_date')
    MIN_LAT = str(condition_dict.get('lat_range')[0])
    MAX_LAT = str(condition_dict.get('lat_range')[1])
    MIN_LONG = str(condition_dict.get('long_range')[0])
    MAX_LONG = str(condition_dict.get('long_range')[1])
    
    cond_url_date = '%5B' + START_DATE + ':1:' + END_DATE 
    cond_url_loc = '%5D%5B(' + MIN_LAT + '):1:(' + MAX_LAT+')%5D%5B(' + MIN_LONG + '):1:(' + MAX_LONG + ')%5D'
    cond_full_url = cond_url_date + cond_url_loc
    
    filename = define_filename(cond_full_url)
    
    # If statement to add url
    First_index_added = False # Inorder to decide whether or not add ',' in front of index, only the first index after .nc?
                              # doesn't need to add ','
     
    keys_list = [*condition_dict]  # get list of key name of dict
    
    # first 9 index into loop for condition check
    for i in range(9):
        
        if condition_dict.get(keys_list[i]):
            if First_index_added:
                output_url += ',' + keys_list[i] + cond_full_url  # since first index involved, just add ','
            else:
                First_index_added = True
                output_url += org_url + keys_list[i] + cond_full_url
                
    return output_url,filename

def define_filename(cond_full_url):
    
    # name = start_date_end_date_lat_range_long_range.nc
    name = cond_full_url.replace("%5D%5B","")
    name = name.replace(")%5D","")
    name = name.replace("%5B(","")
    name = name.replace(":1:","")
    name = name.replace(":00Z)","")
    name = name.replace("-","_")
    name = name.replace(":","_")
    name = name.replace("(","_")
    name = name.replace(")","_")
    name = name.replace("__","_")
    name = name + '_gfs.nc'
    return name


def download_url(url, filename, return_filepath = True):
    print("Request Status | Filename | Finish Time")
    t0 = time.time()
    try:
        print('url')
        print(url)
        r = requests.get(url)
        if r.status_code == requests.codes.ok:
            print("Request OK...",end=' ')
            file = os.path.join(save_dir,filename)
            with open(file, 'wb') as f:
                f.write(r.content)
                print(filename, 'in',round(time.time() - t0,2),'sec')
                return file
    
    except Exception as e:
        print('Exception in download_url():', e)

In [7]:
url,fn = define_url(condition_dict)
print('filename:',fn)

filename: 2023_04_25T12_00_2023_04_30T12_00_0_30_118_150_gfs.nc


In [8]:
download_file_path = download_url(url,fn)
print('File path:',download_file_path)

Request Status | Filename | Finish Time
url
https://pae-paha.pacioos.hawaii.edu/erddap/griddap/ncep_global.nc?tmpsfc%5B(2023-04-25T12:00:00Z):1:(2023-04-30T12:00:00Z)%5D%5B(0):1:(30)%5D%5B(118):1:(150)%5D,tmp2m%5B(2023-04-25T12:00:00Z):1:(2023-04-30T12:00:00Z)%5D%5B(0):1:(30)%5D%5B(118):1:(150)%5D,ugrd10m%5B(2023-04-25T12:00:00Z):1:(2023-04-30T12:00:00Z)%5D%5B(0):1:(30)%5D%5B(118):1:(150)%5D,vgrd10m%5B(2023-04-25T12:00:00Z):1:(2023-04-30T12:00:00Z)%5D%5B(0):1:(30)%5D%5B(118):1:(150)%5D,pratesfc%5B(2023-04-25T12:00:00Z):1:(2023-04-30T12:00:00Z)%5D%5B(0):1:(30)%5D%5B(118):1:(150)%5D,rh2m%5B(2023-04-25T12:00:00Z):1:(2023-04-30T12:00:00Z)%5D%5B(0):1:(30)%5D%5B(118):1:(150)%5D,prmslmsl%5B(2023-04-25T12:00:00Z):1:(2023-04-30T12:00:00Z)%5D%5B(0):1:(30)%5D%5B(118):1:(150)%5D,dlwrfsfc%5B(2023-04-25T12:00:00Z):1:(2023-04-30T12:00:00Z)%5D%5B(0):1:(30)%5D%5B(118):1:(150)%5D,dswrfsfc%5B(2023-04-25T12:00:00Z):1:(2023-04-30T12:00:00Z)%5D%5B(0):1:(30)%5D%5B(118):1:(150)%5D
Request OK... 2023_04_25T12_

In [9]:
# read nc file
ds = xr.open_dataset(download_file_path, engine="netcdf4")
ds

In [8]:
import gc
ds.close()
gc.collect()

38