In [None]:
!pip install order_of_magnitude
!pip install netCDF4

In [None]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir("drive/My Drive/ERA5_Dataset")

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import netCDF4 as nc
import util

## Target Dataframe

In [None]:
df_target = pd.read_csv('./csv/target_NoOutliers_complete.csv')

lats = df_target['LAT'].values
lons = df_target['LON'].values

df_target

Unnamed: 0,DATE,ID,LAT,LON,S.IndAll
0,1980-01-01,,-18.50,80.50,0.00
1,1980-01-02,,-30.00,31.25,0.00
2,1980-01-03,,-4.00,86.50,0.00
3,1980-01-04,,-29.50,32.75,0.00
4,1980-01-05,,-4.75,89.25,0.00
...,...,...,...,...,...
15422,2022-03-23,392.0,-13.75,76.75,0.50
15423,2022-03-24,392.0,-14.25,74.25,1.00
15424,2022-03-25,392.0,-13.50,73.25,1.00
15425,2022-03-26,392.0,-15.00,74.00,1.00


In [None]:
N_dates = df_target.shape[0]
print(N_dates)

15427


## Wind Speed

In [None]:
ds_wind = nc.Dataset('../ERA5_Dataset/nc_original_datasets/wind_speed_300hPa.nc') # 1000, 850, 300

In [None]:
ds_wind

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF3_64BIT_OFFSET data model, file format NETCDF3):
    Conventions: CF-1.6
    history: 2023-10-15 22:04:49 GMT by grib_to_netcdf-2.25.1: /opt/ecmwf/mars-client/bin/grib_to_netcdf.bin -S param -o /cache/data0/adaptor.mars.internal-1697407261.2031271-17839-2-293c2d25-9533-4895-93b5-a56e579d9f29.nc /cache/tmp/293c2d25-9533-4895-93b5-a56e579d9f29-adaptor.mars.internal-1697405826.3319087-17839-3-tmp.grib
    dimensions(sizes): longitude(241), latitude(121), time(15706)
    variables(dimensions): float32 longitude(longitude), float32 latitude(latitude), int32 time(time), int16 u(time, latitude, longitude), int16 v(time, latitude, longitude)
    groups: 

In [None]:
wind_u = ds_wind['u'][:,:,:]
wind_v = ds_wind['v'][:,:,:]

In [None]:
print(wind_u.shape, wind_v.shape)
print(N_dates)

wind_speed = np.sqrt(wind_u**2 + wind_v**2)

print(wind_speed.shape)

(15706, 121, 241) (15706, 121, 241)
15427
(15706, 121, 241)


In [None]:
import math
import random

N_lats = wind_u.shape[1]
N_lons = wind_u.shape[2]

sample_idx = random.randint(0, N_dates-1)
lat_idx    = random.randint(0, N_lats-1)
lon_idx    = random.randint(0, N_lons-1)
print('SAMPLE IDX: ', sample_idx)
print('LAT IDX: ', lat_idx)
print('LON IDX: ', lon_idx)

wind_t = math.sqrt(wind_u[sample_idx, lat_idx, lon_idx]**2 + wind_v[sample_idx, lat_idx, lon_idx]**2)
print('Wind U_Component: ', wind_u[sample_idx, lat_idx, lon_idx])
print('Wind V_Component: ', wind_v[sample_idx, lat_idx, lon_idx])
print('Wind result:      ', wind_t)
print('NumPy component:  ', wind_speed[sample_idx, lat_idx, lon_idx])

SAMPLE IDX:  6295
LAT IDX:  17
LON IDX:  1
Wind U_Component:  -6.2648301843397505
Wind V_Component:  -2.656567729420062
Wind result:       6.8048107497277615
NumPy component:   6.8048107497277615


In [None]:
wind_zones = []

wind_zones.append(wind_speed[:N_dates, 0:30, 0:60])
wind_zones.append(wind_speed[:N_dates, 0:30, 60:120])
wind_zones.append(wind_speed[:N_dates, 0:30, 120:180])
wind_zones.append(wind_speed[:N_dates, 0:30, 180:])

wind_zones.append(wind_speed[:N_dates, 30:60, 0:60])
wind_zones.append(wind_speed[:N_dates, 30:60, 60:120])
wind_zones.append(wind_speed[:N_dates, 30:60, 120:180])
wind_zones.append(wind_speed[:N_dates, 30:60, 180:])

wind_zones.append(wind_speed[:N_dates, 60:90, 0:60])
wind_zones.append(wind_speed[:N_dates, 60:90, 60:120])
wind_zones.append(wind_speed[:N_dates, 60:90, 120:180])
wind_zones.append(wind_speed[:N_dates, 60:90, 180:])

wind_zones.append(wind_speed[:N_dates, 90:, 0:60])
wind_zones.append(wind_speed[:N_dates, 90:, 60:120])
wind_zones.append(wind_speed[:N_dates, 90:, 120:180])
wind_zones.append(wind_speed[:N_dates, 90:, 180:])

print(len(wind_zones))
for i in range(len(wind_zones)):
  print(wind_zones[i].shape)

16
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 61)
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 61)
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 61)
(15427, 31, 60)
(15427, 31, 60)
(15427, 31, 60)
(15427, 31, 61)


In [None]:
mean_zones = []
std_zones  = []

for zone in range(16):
  means = []
  stds = []
  for date_idx in range(N_dates):
    temp_mean = np.mean(wind_zones[zone][date_idx])
    temp_std = np.std(wind_zones[zone][date_idx])
    means.append(temp_mean)
    stds.append(temp_std)
  mean_zones.append(means)
  std_zones.append(stds)

In [None]:
mean_std_zones = np.column_stack((mean_zones[0], std_zones[0],
                                  mean_zones[1], std_zones[1],
                                  mean_zones[2], std_zones[2],
                                  mean_zones[3], std_zones[3],
                                  mean_zones[4], std_zones[4],
                                  mean_zones[5], std_zones[5],
                                  mean_zones[6], std_zones[6],
                                  mean_zones[7], std_zones[7],
                                  mean_zones[8], std_zones[8],
                                  mean_zones[9], std_zones[9],
                                  mean_zones[10], std_zones[10],
                                  mean_zones[11], std_zones[11],
                                  mean_zones[12], std_zones[12],
                                  mean_zones[13], std_zones[13],
                                  mean_zones[14], std_zones[14],
                                  mean_zones[15], std_zones[15]))

# Save Datset

In [None]:
target_att = ['DATE', 'S.IndAll']
mean_std_zones_att = ['Mean_Z1', 'Std_Z1',
                      'Mean_Z2', 'Std_Z2',
                      'Mean_Z3', 'Std_Z3',
                      'Mean_Z4', 'Std_Z4',
                      'Mean_Z5', 'Std_Z5',
                      'Mean_Z6', 'Std_Z6',
                      'Mean_Z7', 'Std_Z7',
                      'Mean_Z8', 'Std_Z8',
                      'Mean_Z9', 'Std_Z9',
                      'Mean_Z10', 'Std_Z10',
                      'Mean_Z11', 'Std_Z11',
                      'Mean_Z12', 'Std_Z12',
                      'Mean_Z13', 'Std_Z13',
                      'Mean_Z14', 'Std_Z14',
                      'Mean_Z15', 'Std_Z15',
                      'Mean_Z16', 'Std_Z16']

target = df_target[target_att].values
final_array = np.hstack((target, mean_std_zones))
df_complete = pd.DataFrame(final_array, columns=target_att+mean_std_zones_att)
df_complete[11:]

Unnamed: 0,DATE,S.IndAll,Mean_Z1,Std_Z1,Mean_Z2,Std_Z2,Mean_Z3,Std_Z3,Mean_Z4,Std_Z4,...,Mean_Z12,Std_Z12,Mean_Z13,Std_Z13,Mean_Z14,Std_Z14,Mean_Z15,Std_Z15,Mean_Z16,Std_Z16
11,1980-01-12,0.0,4.01905,1.252423,3.324028,1.665169,5.979807,2.18291,4.795236,2.39289,...,10.595396,5.58681,23.480714,5.449824,14.408811,8.62976,8.798461,5.474088,25.126955,8.978094
12,1980-01-13,0.0,5.27905,1.818216,5.185075,1.503941,7.781539,2.726287,5.162079,1.933401,...,8.286046,3.86648,21.337919,4.668694,11.939354,7.598425,11.031555,6.222616,27.313743,9.126969
13,1980-01-14,0.0,5.223869,2.608225,5.077376,2.34467,7.007538,2.943386,4.272757,2.019792,...,9.54709,5.602193,15.715568,6.38101,13.21349,8.545362,11.534418,6.037818,21.784233,4.357661
14,1980-01-15,0.0,5.899075,2.203613,5.526532,2.996684,5.978003,2.761261,4.137485,1.782385,...,10.762186,4.555983,9.093025,5.860548,16.932395,6.474113,8.892506,4.997361,13.054062,3.446599
15,1980-01-16,0.0,5.355641,2.880033,6.772148,2.703436,4.999873,1.901404,3.75899,1.601356,...,9.231056,2.779431,5.597448,3.988066,14.880802,5.727929,11.187458,5.509884,12.909484,4.079441
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15422,2022-03-23,0.5,7.911888,2.62062,7.430936,3.06294,8.240616,2.665927,14.356912,1.913285,...,10.40295,5.776051,17.932813,4.401436,21.540357,4.24033,22.572247,5.069329,9.378263,5.696661
15423,2022-03-24,1.0,8.418339,3.173017,7.483778,2.425831,10.97041,5.038001,15.84807,1.480681,...,13.228639,6.573564,20.065123,2.760099,22.011093,3.943388,23.001928,7.730189,12.258979,5.849311
15424,2022-03-25,1.0,6.215071,1.795399,5.260999,2.479599,10.942486,4.913747,18.225701,3.290692,...,14.160977,5.741961,27.709152,7.814903,19.513143,2.449494,18.756631,7.643597,18.389561,9.097944
15425,2022-03-26,1.0,4.987821,2.043618,4.72101,2.245883,13.93619,6.801206,17.46428,3.288953,...,14.802169,6.644496,30.798317,7.763103,24.263592,6.321115,19.567783,6.188786,25.289646,6.775203


# Rollback 10 days

In [None]:
def attribute_stepped(atts, max_steps):
  att_list = []
  for att in atts:
    for i in range(max_steps):
      att_list.append('{}-{}'.format(att,i))
  return att_list

In [None]:
def mean_std_atts_zones(zones_n):
  new_att_list = []
  for i in range(zones_n):
    new_att_list.append('Mean_Z{}'.format(i+1))
    new_att_list.append('Std_Z{}'.format(i+1))
  return new_att_list

print(mean_std_atts_zones(16))

['Mean_Z1', 'Std_Z1', 'Mean_Z2', 'Std_Z2', 'Mean_Z3', 'Std_Z3', 'Mean_Z4', 'Std_Z4', 'Mean_Z5', 'Std_Z5', 'Mean_Z6', 'Std_Z6', 'Mean_Z7', 'Std_Z7', 'Mean_Z8', 'Std_Z8', 'Mean_Z9', 'Std_Z9', 'Mean_Z10', 'Std_Z10', 'Mean_Z11', 'Std_Z11', 'Mean_Z12', 'Std_Z12', 'Mean_Z13', 'Std_Z13', 'Mean_Z14', 'Std_Z14', 'Mean_Z15', 'Std_Z15', 'Mean_Z16', 'Std_Z16']


In [None]:
max_steps = 11
labels_to_rewind = mean_std_atts_zones(16)
labels_stepped = attribute_stepped(labels_to_rewind, max_steps)
print(labels_stepped)

stepped_mean_zones = []
stepped_std_zones = []

for zone in range(1, 17):
  mean_press_10d = np.zeros((N_dates-max_steps, max_steps))
  std_press_10d = np.zeros((N_dates-max_steps, max_steps))

  mean_press = df_complete['Mean_Z{}'.format(zone)]
  std_press = df_complete['Std_Z{}'.format(zone)]

  for day in range(max_steps, N_dates):
    for steps in range(max_steps):
      mean_press_10d[day-max_steps, steps] = mean_press[day-steps]
      std_press_10d[day-max_steps, steps] = std_press[day-steps]
  print(mean_press_10d.shape)
  stepped_mean_zones.append(mean_press_10d)
  stepped_std_zones.append(std_press_10d)

final_array = np.hstack((target[max_steps:],
                         stepped_mean_zones[0], stepped_std_zones[0],
                         stepped_mean_zones[1], stepped_std_zones[1],
                         stepped_mean_zones[2], stepped_std_zones[2],
                         stepped_mean_zones[3], stepped_std_zones[3],
                         stepped_mean_zones[4], stepped_std_zones[4],
                         stepped_mean_zones[5], stepped_std_zones[5],
                         stepped_mean_zones[6], stepped_std_zones[6],
                         stepped_mean_zones[7], stepped_std_zones[7],
                         stepped_mean_zones[8], stepped_std_zones[8],
                         stepped_mean_zones[9], stepped_std_zones[9],
                         stepped_mean_zones[10], stepped_std_zones[10],
                         stepped_mean_zones[11], stepped_std_zones[11],
                         stepped_mean_zones[12], stepped_std_zones[12],
                         stepped_mean_zones[13], stepped_std_zones[13],
                         stepped_mean_zones[14], stepped_std_zones[14],
                         stepped_mean_zones[15], stepped_std_zones[15],
                         ))
df_stepped = pd.DataFrame(final_array , columns = target_att + labels_stepped)
df_stepped

['Mean_Z1-0', 'Mean_Z1-1', 'Mean_Z1-2', 'Mean_Z1-3', 'Mean_Z1-4', 'Mean_Z1-5', 'Mean_Z1-6', 'Mean_Z1-7', 'Mean_Z1-8', 'Mean_Z1-9', 'Mean_Z1-10', 'Std_Z1-0', 'Std_Z1-1', 'Std_Z1-2', 'Std_Z1-3', 'Std_Z1-4', 'Std_Z1-5', 'Std_Z1-6', 'Std_Z1-7', 'Std_Z1-8', 'Std_Z1-9', 'Std_Z1-10', 'Mean_Z2-0', 'Mean_Z2-1', 'Mean_Z2-2', 'Mean_Z2-3', 'Mean_Z2-4', 'Mean_Z2-5', 'Mean_Z2-6', 'Mean_Z2-7', 'Mean_Z2-8', 'Mean_Z2-9', 'Mean_Z2-10', 'Std_Z2-0', 'Std_Z2-1', 'Std_Z2-2', 'Std_Z2-3', 'Std_Z2-4', 'Std_Z2-5', 'Std_Z2-6', 'Std_Z2-7', 'Std_Z2-8', 'Std_Z2-9', 'Std_Z2-10', 'Mean_Z3-0', 'Mean_Z3-1', 'Mean_Z3-2', 'Mean_Z3-3', 'Mean_Z3-4', 'Mean_Z3-5', 'Mean_Z3-6', 'Mean_Z3-7', 'Mean_Z3-8', 'Mean_Z3-9', 'Mean_Z3-10', 'Std_Z3-0', 'Std_Z3-1', 'Std_Z3-2', 'Std_Z3-3', 'Std_Z3-4', 'Std_Z3-5', 'Std_Z3-6', 'Std_Z3-7', 'Std_Z3-8', 'Std_Z3-9', 'Std_Z3-10', 'Mean_Z4-0', 'Mean_Z4-1', 'Mean_Z4-2', 'Mean_Z4-3', 'Mean_Z4-4', 'Mean_Z4-5', 'Mean_Z4-6', 'Mean_Z4-7', 'Mean_Z4-8', 'Mean_Z4-9', 'Mean_Z4-10', 'Std_Z4-0', 'Std_Z4-1', 

Unnamed: 0,DATE,S.IndAll,Mean_Z1-0,Mean_Z1-1,Mean_Z1-2,Mean_Z1-3,Mean_Z1-4,Mean_Z1-5,Mean_Z1-6,Mean_Z1-7,...,Std_Z16-1,Std_Z16-2,Std_Z16-3,Std_Z16-4,Std_Z16-5,Std_Z16-6,Std_Z16-7,Std_Z16-8,Std_Z16-9,Std_Z16-10
0,1980-01-12,0.0,4.01905,4.744751,4.135923,6.399491,7.709299,4.121322,4.490097,7.167944,...,5.478564,5.122944,4.321819,4.603736,6.386266,7.995152,5.001129,4.140354,4.114581,4.426226
1,1980-01-13,0.0,5.27905,4.01905,4.744751,4.135923,6.399491,7.709299,4.121322,4.490097,...,8.978094,5.478564,5.122944,4.321819,4.603736,6.386266,7.995152,5.001129,4.140354,4.114581
2,1980-01-14,0.0,5.223869,5.27905,4.01905,4.744751,4.135923,6.399491,7.709299,4.121322,...,9.126969,8.978094,5.478564,5.122944,4.321819,4.603736,6.386266,7.995152,5.001129,4.140354
3,1980-01-15,0.0,5.899075,5.223869,5.27905,4.01905,4.744751,4.135923,6.399491,7.709299,...,4.357661,9.126969,8.978094,5.478564,5.122944,4.321819,4.603736,6.386266,7.995152,5.001129
4,1980-01-16,0.0,5.355641,5.899075,5.223869,5.27905,4.01905,4.744751,4.135923,6.399491,...,3.446599,4.357661,9.126969,8.978094,5.478564,5.122944,4.321819,4.603736,6.386266,7.995152
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15411,2022-03-23,0.5,7.911888,6.515268,10.005322,9.206442,10.74249,16.246376,12.039412,5.640178,...,5.63052,4.626569,3.827593,2.467184,7.651016,7.560619,6.812436,8.429707,8.091367,7.365638
15412,2022-03-24,1.0,8.418339,7.911888,6.515268,10.005322,9.206442,10.74249,16.246376,12.039412,...,5.696661,5.63052,4.626569,3.827593,2.467184,7.651016,7.560619,6.812436,8.429707,8.091367
15413,2022-03-25,1.0,6.215071,8.418339,7.911888,6.515268,10.005322,9.206442,10.74249,16.246376,...,5.849311,5.696661,5.63052,4.626569,3.827593,2.467184,7.651016,7.560619,6.812436,8.429707
15414,2022-03-26,1.0,4.987821,6.215071,8.418339,7.911888,6.515268,10.005322,9.206442,10.74249,...,9.097944,5.849311,5.696661,5.63052,4.626569,3.827593,2.467184,7.651016,7.560619,6.812436


In [None]:
df_stepped.to_csv('./csv/16_zones/wind_speed_300hPa_10D.csv', index=False)