## Convert the downloaded data into pandas dataframe and save it.


In [1]:
import pandas as pd
import numpy as np
import os

In [2]:

weather_parameters = [
  "CAPE_most_unstable_below_500hPa",
  "CAPE_surface",
  "cloud_amount_below_1000ft_ASL",
  "cloud_amount_of_high_cloud",
  "cloud_amount_of_low_cloud",
  "cloud_amount_of_medium_cloud",
  "cloud_amount_of_total_cloud",
  "fog_fraction_at_screen_level",
  # "hail_fall_accumulation-PT01H",
  "hail_fall_rate",
  "height_AGL_at_cloud_base_where_cloud_cover_2p5_oktas",
  "height_AGL_at_freezing_level",
  "height_AGL_at_wet_bulb_freezing_level",
  "landsea_mask",
  # "lightning_flash_accumulation-PT01H",
  # "precipitation_accumulation-PT01H",
  "precipitation_rate",
  "pressure_at_mean_sea_level",
  "pressure_at_surface",
  "radiation_flux_in_longwave_downward_at_surface",
  "radiation_flux_in_shortwave_diffuse_downward_at_surface",
  "radiation_flux_in_shortwave_direct_downward_at_surface",
  "radiation_flux_in_shortwave_total_downward_at_surface",
  "radiation_flux_in_uv_downward_at_surface",
  # "rainfall_accumulation-PT01H",
  "rainfall_rate",
  "relative_humidity_at_screen_level",
  "sensible_heat_flux_at_surface",
  "snow_depth_water_equivalent",
  # "snowfall_accumulation-PT01H",
  "snowfall_rate",
  "temperature_at_screen_level",
  # "temperature_at_screen_level_max-PT01H",
  # "temperature_at_screen_level_min-PT01H",
  "temperature_at_surface",
  "temperature_of_dew_point_at_screen_level",
  "visibility_at_screen_level",
  "wind_direction_at_10m",
  "wind_gust_at_10m",
  # "wind_gust_at_10m_max-PT01H",
  "wind_speed_at_10m"
]

In [3]:

accumulate_weather_parameters = [
  "hail_fall_accumulation-PT01H",
  "lightning_flash_accumulation-PT01H",
  "precipitation_accumulation-PT01H",
  "rainfall_accumulation-PT01H",
  "snowfall_accumulation-PT01H",
  "temperature_at_screen_level_max-PT01H",
  "temperature_at_screen_level_min-PT01H",
  "wind_gust_at_10m_max-PT01H",
]

In [4]:
# Path to the downloaded weather forecast files, extracted Edinburgh area and  converted into numpy array (See download_asdl.py)
root = './asdi_data'
accum_root = './accumulate_out'

In [5]:

height = 8
width = 13

In [None]:

def read_files(data_dir, weather_params):
  num_weather_params = len(weather_params)

  df = None
  for path, subdirs, files in os.walk(data_dir):
    for name in files:
      if not name.endswith(".npy"):
        continue

      file = os.path.join(path, name)
      data = np.load(file)
      assert data.shape == (num_weather_params, height, width)
      y, x = np.meshgrid(range(height), range(width), indexing='ij')
      data_dict = {}
      for i, w in enumerate(weather_params):
        data_dict[w] = data[i].flatten()
      df1 = pd.DataFrame({
        'date_time': [name.replace(".npy", "")] * height * width, 
        'y': y.flatten(), 
        'x': x.flatten(), 
        **data_dict}
      )
      df = df1 if df is None else pd.concat([df, df1], axis=0, ignore_index=True)
  df = df.sort_values(['date_time', 'y', 'x'], ignore_index=True)
  return df

df1 = read_files(root, weather_parameters)
df2 = read_files(accum_root, accumulate_weather_parameters)
df = pd.merge(df1, df2, on=['date_time', 'y', 'x'], how='inner')
df.to_parquet("./input_data/asdi_data.parquet")
