In [22]:
import boto3
import pandas as pd
from io import StringIO
import os

First let's grab some timeseries data using the boto3 package in Python

In [5]:
# Get the global gid csv file
# Initialize boto3 client
s3_client = boto3.client('s3')

# Define the S3 bucket and file path
bucket_name = 'era5-for-wrf'
file_key = 'era5_global_gids.csv'

# Get the file object
s3_object = s3_client.get_object(Bucket=bucket_name, Key=file_key)

# Read the file content
file_content = s3_object['Body'].read().decode('utf-8')

# Use StringIO to convert the file content to a file-like object
csv_string_io = StringIO(file_content)

# Read the CSV file into a Pandas DataFrame
df = pd.read_csv(csv_string_io, index_col = 0)

# Display the DataFrame
print(df)

             gid   lat     lon
0              1  90.0    0.00
1              2  90.0    0.25
2              3  90.0    0.50
3              4  90.0    0.75
4              5  90.0    1.00
...          ...   ...     ...
1038235  1038236 -90.0  358.75
1038236  1038237 -90.0  359.00
1038237  1038238 -90.0  359.25
1038238  1038239 -90.0  359.50
1038239  1038240 -90.0  359.75

[1038240 rows x 3 columns]


In [13]:
# Now let's define a function to return the GID for a given latitude and longitude
def get_gid(lat, lon, gid_df):
    # Round to the nearest 0.25 degrees
    lat = round(lat*4)/4
    lon = round(lon*4)/4

    # Find entry in gid_df with those lat/lons
    gid = gid_df[(gid_df['lat'] == lat) & (gid_df['lon'] == lon)]['gid'].values[0]

    # Return result
    return str(gid).zfill(7)

In [14]:
# Let's test it out
lat = 52.52
lon = 13.405
gid = get_gid(lat, lon, df)
print(f"The GID for latitude {lat} and longitude {lon} is {gid}")

The GID for latitude 52.52 and longitude 13.405 is 0216055


In [20]:
# Now let's use the GID to download our data
# Define the S3 bucket and file path
bucket_name = 'era5-for-wrf'
file_key = f'global_single_level/cells/{gid}/timeseries.csv'

# Get the file object
s3_object = s3_client.get_object(Bucket=bucket_name, Key=file_key)

# Read the file content
file_content = s3_object['Body'].read().decode('utf-8')

# Use StringIO to convert the file content to a file-like object
csv_string_io = StringIO(file_content)

# Read the CSV file into a Pandas DataFrame
era5_df = pd.read_csv(csv_string_io, index_col = 0, skiprows = 1, parse_dates = True)

In [21]:
# Let's look at the data
era5_df

Unnamed: 0_level_0,100-m wind speed (m/s),100-m wind direction (degrees),2-m temperature (K),surface pressure (Pa)
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-01 00:00:00,4.09,190.88,272.30,101777.43
2000-01-01 01:00:00,4.05,193.58,272.50,101801.09
2000-01-01 02:00:00,4.19,197.67,272.38,101809.48
2000-01-01 03:00:00,4.25,199.57,272.37,101785.27
2000-01-01 04:00:00,4.30,200.83,272.70,101759.05
...,...,...,...,...
2024-03-31 19:00:00,3.93,298.71,285.64,99383.31
2024-03-31 20:00:00,4.07,316.15,284.35,99398.93
2024-03-31 21:00:00,4.12,340.19,283.41,99366.82
2024-03-31 22:00:00,3.69,7.58,283.07,99351.20


Now if we wanted to avoid the boto3 route, we can simply download data using AWS CLI: 

In [23]:
cmd = f"aws s3 cp s3://era5-for-wrf/global_single_level/cells/{gid}/timeseries.csv ."

os.system(cmd)

0