In [1]:
!pip install order_of_magnitude
!pip install netCDF4

Collecting order_of_magnitude
  Downloading order_of_magnitude-2.3.1.tar.gz (4.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: order_of_magnitude
  Building wheel for order_of_magnitude (setup.py) ... [?25l[?25hdone
  Created wheel for order_of_magnitude: filename=order_of_magnitude-2.3.1-py3-none-any.whl size=4579 sha256=e83228d4968e539d00966377ad0e0d7284a808c28b381c56b563858eb69ed061
  Stored in directory: /root/.cache/pip/wheels/85/53/9b/310924c3bd35596e70e6d3597f347986a2f95cfb6393e374ec
Successfully built order_of_magnitude
Installing collected packages: order_of_magnitude
Successfully installed order_of_magnitude-2.3.1
Collecting netCDF4
  Downloading netCDF4-1.6.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cftime (from netCDF4)
  Downloading cftime-1.6.3-cp310-cp310-many

In [2]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir("drive/My Drive/ERA5_Dataset")

Mounted at /content/drive


In [3]:
import numpy as np
import pandas as pd
import netCDF4 as nc

## Target Dataframe

In [4]:
df_target = pd.read_csv('./csv/target_NoOutliers_complete.csv')

lats = df_target['LAT'].values
lons = df_target['LON'].values

df_target

Unnamed: 0,DATE,ID,LAT,LON,S.IndAll
0,1980-01-01,,-18.50,80.50,0.00
1,1980-01-02,,-30.00,31.25,0.00
2,1980-01-03,,-4.00,86.50,0.00
3,1980-01-04,,-29.50,32.75,0.00
4,1980-01-05,,-4.75,89.25,0.00
...,...,...,...,...,...
15422,2022-03-23,392.0,-13.75,76.75,0.50
15423,2022-03-24,392.0,-14.25,74.25,1.00
15424,2022-03-25,392.0,-13.50,73.25,1.00
15425,2022-03-26,392.0,-15.00,74.00,1.00


In [5]:
dates = df_target['DATE'].values
N_dates = dates.shape[0]
print(N_dates)

15427


## Wind Speed

In [6]:
ds_wind = nc.Dataset('../ERA5_Dataset/nc_original_datasets/wind_speed_10m.nc')

In [7]:
ds_wind['u10']

<class 'netCDF4._netCDF4.Variable'>
int16 u10(time, expver, latitude, longitude)
    scale_factor: 0.0010146746628815588
    add_offset: -2.1994132821556596
    _FillValue: -32767
    missing_value: -32767
    units: m s**-1
    long_name: 10 metre U wind component
unlimited dimensions: 
current shape = (15990, 2, 121, 241)
filling on

In [8]:
wind_u = ds_wind['u10'][:N_dates, 0, :, :]
wind_v = ds_wind['v10'][:N_dates, 0, :, :]

In [9]:
print(wind_u.shape, wind_v.shape)
print(N_dates)

wind_speed = np.sqrt(wind_u**2 + wind_v**2)

print(wind_speed.shape)

(15427, 121, 241) (15427, 121, 241)
15427
(15427, 121, 241)


In [10]:
import math
import random

N_lats = wind_u.shape[1]
N_lons = wind_u.shape[2]

sample_idx = random.randint(0, N_dates-1)
lat_idx    = random.randint(0, N_lats-1)
lon_idx    = random.randint(0, N_lons-1)
print('SAMPLE IDX: ', sample_idx)
print('LAT IDX: ', lat_idx)
print('LON IDX: ', lon_idx)

wind_t = math.sqrt(wind_u[sample_idx, lat_idx, lon_idx]**2 + wind_v[sample_idx, lat_idx, lon_idx]**2)
print('Wind U_Component: ', wind_u[sample_idx, lat_idx, lon_idx])
print('Wind V_Component: ', wind_v[sample_idx, lat_idx, lon_idx])
print('Wind result:      ', wind_t)
print('NumPy component:  ', wind_speed[sample_idx, lat_idx, lon_idx])

SAMPLE IDX:  4240
LAT IDX:  111
LON IDX:  6
Wind U_Component:  -0.27559012133222405
Wind V_Component:  3.8387486782623768
Wind result:       3.848628499847323
NumPy component:   3.848628499847323


In [11]:
wind_zones = []

wind_zones.append(wind_speed[:N_dates, 0:30, 0:60])
wind_zones.append(wind_speed[:N_dates, 0:30, 60:120])
wind_zones.append(wind_speed[:N_dates, 0:30, 120:180])
wind_zones.append(wind_speed[:N_dates, 0:30, 180:])

wind_zones.append(wind_speed[:N_dates, 30:60, 0:60])
wind_zones.append(wind_speed[:N_dates, 30:60, 60:120])
wind_zones.append(wind_speed[:N_dates, 30:60, 120:180])
wind_zones.append(wind_speed[:N_dates, 30:60, 180:])

wind_zones.append(wind_speed[:N_dates, 60:90, 0:60])
wind_zones.append(wind_speed[:N_dates, 60:90, 60:120])
wind_zones.append(wind_speed[:N_dates, 60:90, 120:180])
wind_zones.append(wind_speed[:N_dates, 60:90, 180:])

wind_zones.append(wind_speed[:N_dates, 90:, 0:60])
wind_zones.append(wind_speed[:N_dates, 90:, 60:120])
wind_zones.append(wind_speed[:N_dates, 90:, 120:180])
wind_zones.append(wind_speed[:N_dates, 90:, 180:])

print(len(wind_zones))
for i in range(len(wind_zones)):
  print(wind_zones[i].shape)

16
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 61)
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 61)
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 60)
(15427, 30, 61)
(15427, 31, 60)
(15427, 31, 60)
(15427, 31, 60)
(15427, 31, 61)


In [12]:
mean_zones = []
std_zones  = []

for zone in range(16):
  means = []
  stds = []
  for date_idx in range(N_dates):
    temp_mean = np.mean(wind_zones[zone][date_idx])
    temp_std = np.std(wind_zones[zone][date_idx])
    means.append(temp_mean)
    stds.append(temp_std)
  mean_zones.append(means)
  std_zones.append(stds)

In [13]:
mean_std_zones = np.column_stack((mean_zones[0], std_zones[0],
                                  mean_zones[1], std_zones[1],
                                  mean_zones[2], std_zones[2],
                                  mean_zones[3], std_zones[3],
                                  mean_zones[4], std_zones[4],
                                  mean_zones[5], std_zones[5],
                                  mean_zones[6], std_zones[6],
                                  mean_zones[7], std_zones[7],
                                  mean_zones[8], std_zones[8],
                                  mean_zones[9], std_zones[9],
                                  mean_zones[10], std_zones[10],
                                  mean_zones[11], std_zones[11],
                                  mean_zones[12], std_zones[12],
                                  mean_zones[13], std_zones[13],
                                  mean_zones[14], std_zones[14],
                                  mean_zones[15], std_zones[15]))

# Save Datset

In [14]:
target_att = ['DATE', 'S.IndAll']
mean_std_zones_att = ['Mean_Z1', 'Std_Z1',
                      'Mean_Z2', 'Std_Z2',
                      'Mean_Z3', 'Std_Z3',
                      'Mean_Z4', 'Std_Z4',
                      'Mean_Z5', 'Std_Z5',
                      'Mean_Z6', 'Std_Z6',
                      'Mean_Z7', 'Std_Z7',
                      'Mean_Z8', 'Std_Z8',
                      'Mean_Z9', 'Std_Z9',
                      'Mean_Z10', 'Std_Z10',
                      'Mean_Z11', 'Std_Z11',
                      'Mean_Z12', 'Std_Z12',
                      'Mean_Z13', 'Std_Z13',
                      'Mean_Z14', 'Std_Z14',
                      'Mean_Z15', 'Std_Z15',
                      'Mean_Z16', 'Std_Z16']

target = df_target[target_att].values
u_final_array = np.hstack((target, mean_std_zones))
df_complete = pd.DataFrame(u_final_array, columns=target_att+mean_std_zones_att)
df_complete[11:]

Unnamed: 0,DATE,S.IndAll,Mean_Z1,Std_Z1,Mean_Z2,Std_Z2,Mean_Z3,Std_Z3,Mean_Z4,Std_Z4,...,Mean_Z12,Std_Z12,Mean_Z13,Std_Z13,Mean_Z14,Std_Z14,Mean_Z15,Std_Z15,Mean_Z16,Std_Z16
11,1980-01-12,0.0,3.142576,2.424131,6.441758,0.852889,5.62607,1.231819,5.931304,1.569316,...,7.189809,1.051691,6.10266,3.279496,8.429659,2.677633,9.71366,0.91176,8.927157,1.203335
12,1980-01-13,0.0,3.401442,2.71081,5.672941,1.134203,3.377698,1.224296,5.467634,2.172411,...,8.329382,0.982931,4.637758,2.078464,7.127945,2.251145,10.044767,0.95546,8.181492,1.173854
13,1980-01-14,0.0,3.234341,2.352098,4.589037,1.301141,4.428763,1.464449,6.204593,1.346096,...,9.029643,0.937229,4.796277,2.421076,5.662417,1.961176,9.683069,1.043084,8.170344,1.506696
14,1980-01-15,0.0,3.488128,2.688362,5.241067,1.065317,5.129635,1.522463,6.748928,1.623675,...,9.420071,0.994099,5.488896,2.731995,5.512785,2.058091,8.335757,0.924478,7.651466,2.220471
15,1980-01-16,0.0,3.15715,2.029123,4.197194,1.100352,4.387732,1.16552,6.111988,1.726098,...,7.959712,1.006011,7.522853,3.434544,9.493474,2.882234,7.564081,1.289406,7.031257,1.641882
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15422,2022-03-23,0.5,3.01531,1.636619,3.335749,1.342907,5.574488,1.28105,5.617761,2.562441,...,10.702839,1.17309,5.963278,3.004767,6.269811,1.861193,5.694835,1.386068,8.530194,1.331598
15423,2022-03-24,1.0,3.120645,2.010405,3.896359,1.325175,7.730765,1.473578,4.967707,2.501188,...,9.578779,1.497547,4.149509,3.169508,6.453957,1.935525,5.907757,1.046332,6.891215,1.427954
15424,2022-03-25,1.0,2.556978,1.520092,3.817205,0.804072,7.757466,1.798016,4.817766,3.164726,...,8.781567,1.695233,4.114227,2.446899,5.440896,1.941487,7.56824,1.04947,7.802964,1.383475
15425,2022-03-26,1.0,2.757154,1.479623,3.251142,1.518349,6.768595,1.35151,3.73185,2.064519,...,8.973367,1.52534,4.41025,1.992304,4.385696,1.611536,6.709664,1.063469,9.023808,1.378514


# Rollback 10 days

In [15]:
def attribute_stepped(atts, max_steps):
  att_list = []
  for att in atts:
    for i in range(max_steps):
      att_list.append('{}-{}'.format(att,i))
  return att_list

In [16]:
def mean_std_atts_zones(zones_n):
  new_att_list = []
  for i in range(zones_n):
    new_att_list.append('Mean_Z{}'.format(i+1))
    new_att_list.append('Std_Z{}'.format(i+1))
  return new_att_list

print(mean_std_atts_zones(16))

['Mean_Z1', 'Std_Z1', 'Mean_Z2', 'Std_Z2', 'Mean_Z3', 'Std_Z3', 'Mean_Z4', 'Std_Z4', 'Mean_Z5', 'Std_Z5', 'Mean_Z6', 'Std_Z6', 'Mean_Z7', 'Std_Z7', 'Mean_Z8', 'Std_Z8', 'Mean_Z9', 'Std_Z9', 'Mean_Z10', 'Std_Z10', 'Mean_Z11', 'Std_Z11', 'Mean_Z12', 'Std_Z12', 'Mean_Z13', 'Std_Z13', 'Mean_Z14', 'Std_Z14', 'Mean_Z15', 'Std_Z15', 'Mean_Z16', 'Std_Z16']


In [17]:
max_steps = 11
labels_to_rewind = mean_std_atts_zones(16)
labels_stepped = attribute_stepped(labels_to_rewind, max_steps)
print(labels_stepped)

stepped_mean_zones = []
stepped_std_zones = []

for zone in range(1, 17):
  mean_press_10d = np.zeros((N_dates-max_steps, max_steps))
  std_press_10d = np.zeros((N_dates-max_steps, max_steps))

  mean_press = df_complete['Mean_Z{}'.format(zone)]
  std_press = df_complete['Std_Z{}'.format(zone)]

  for day in range(max_steps, N_dates):
    for steps in range(max_steps):
      mean_press_10d[day-max_steps, steps] = mean_press[day-steps]
      std_press_10d[day-max_steps, steps] = std_press[day-steps]
  print(mean_press_10d.shape)
  stepped_mean_zones.append(mean_press_10d)
  stepped_std_zones.append(std_press_10d)

final_array = np.hstack((target[max_steps:],
                         stepped_mean_zones[0], stepped_std_zones[0],
                         stepped_mean_zones[1], stepped_std_zones[1],
                         stepped_mean_zones[2], stepped_std_zones[2],
                         stepped_mean_zones[3], stepped_std_zones[3],
                         stepped_mean_zones[4], stepped_std_zones[4],
                         stepped_mean_zones[5], stepped_std_zones[5],
                         stepped_mean_zones[6], stepped_std_zones[6],
                         stepped_mean_zones[7], stepped_std_zones[7],
                         stepped_mean_zones[8], stepped_std_zones[8],
                         stepped_mean_zones[9], stepped_std_zones[9],
                         stepped_mean_zones[10], stepped_std_zones[10],
                         stepped_mean_zones[11], stepped_std_zones[11],
                         stepped_mean_zones[12], stepped_std_zones[12],
                         stepped_mean_zones[13], stepped_std_zones[13],
                         stepped_mean_zones[14], stepped_std_zones[14],
                         stepped_mean_zones[15], stepped_std_zones[15],
                         ))
df_stepped_u = pd.DataFrame(final_array , columns = target_att + labels_stepped)
df_stepped_u

['Mean_Z1-0', 'Mean_Z1-1', 'Mean_Z1-2', 'Mean_Z1-3', 'Mean_Z1-4', 'Mean_Z1-5', 'Mean_Z1-6', 'Mean_Z1-7', 'Mean_Z1-8', 'Mean_Z1-9', 'Mean_Z1-10', 'Std_Z1-0', 'Std_Z1-1', 'Std_Z1-2', 'Std_Z1-3', 'Std_Z1-4', 'Std_Z1-5', 'Std_Z1-6', 'Std_Z1-7', 'Std_Z1-8', 'Std_Z1-9', 'Std_Z1-10', 'Mean_Z2-0', 'Mean_Z2-1', 'Mean_Z2-2', 'Mean_Z2-3', 'Mean_Z2-4', 'Mean_Z2-5', 'Mean_Z2-6', 'Mean_Z2-7', 'Mean_Z2-8', 'Mean_Z2-9', 'Mean_Z2-10', 'Std_Z2-0', 'Std_Z2-1', 'Std_Z2-2', 'Std_Z2-3', 'Std_Z2-4', 'Std_Z2-5', 'Std_Z2-6', 'Std_Z2-7', 'Std_Z2-8', 'Std_Z2-9', 'Std_Z2-10', 'Mean_Z3-0', 'Mean_Z3-1', 'Mean_Z3-2', 'Mean_Z3-3', 'Mean_Z3-4', 'Mean_Z3-5', 'Mean_Z3-6', 'Mean_Z3-7', 'Mean_Z3-8', 'Mean_Z3-9', 'Mean_Z3-10', 'Std_Z3-0', 'Std_Z3-1', 'Std_Z3-2', 'Std_Z3-3', 'Std_Z3-4', 'Std_Z3-5', 'Std_Z3-6', 'Std_Z3-7', 'Std_Z3-8', 'Std_Z3-9', 'Std_Z3-10', 'Mean_Z4-0', 'Mean_Z4-1', 'Mean_Z4-2', 'Mean_Z4-3', 'Mean_Z4-4', 'Mean_Z4-5', 'Mean_Z4-6', 'Mean_Z4-7', 'Mean_Z4-8', 'Mean_Z4-9', 'Mean_Z4-10', 'Std_Z4-0', 'Std_Z4-1', 

Unnamed: 0,DATE,S.IndAll,Mean_Z1-0,Mean_Z1-1,Mean_Z1-2,Mean_Z1-3,Mean_Z1-4,Mean_Z1-5,Mean_Z1-6,Mean_Z1-7,...,Std_Z16-1,Std_Z16-2,Std_Z16-3,Std_Z16-4,Std_Z16-5,Std_Z16-6,Std_Z16-7,Std_Z16-8,Std_Z16-9,Std_Z16-10
0,1980-01-12,0.0,3.142576,3.088765,3.399019,3.556532,3.063092,2.989611,2.81111,2.818293,...,2.03997,2.067501,1.792409,1.335814,2.625999,2.49893,1.273346,0.84462,0.90125,1.268059
1,1980-01-13,0.0,3.401442,3.142576,3.088765,3.399019,3.556532,3.063092,2.989611,2.81111,...,1.203335,2.03997,2.067501,1.792409,1.335814,2.625999,2.49893,1.273346,0.84462,0.90125
2,1980-01-14,0.0,3.234341,3.401442,3.142576,3.088765,3.399019,3.556532,3.063092,2.989611,...,1.173854,1.203335,2.03997,2.067501,1.792409,1.335814,2.625999,2.49893,1.273346,0.84462
3,1980-01-15,0.0,3.488128,3.234341,3.401442,3.142576,3.088765,3.399019,3.556532,3.063092,...,1.506696,1.173854,1.203335,2.03997,2.067501,1.792409,1.335814,2.625999,2.49893,1.273346
4,1980-01-16,0.0,3.15715,3.488128,3.234341,3.401442,3.142576,3.088765,3.399019,3.556532,...,2.220471,1.506696,1.173854,1.203335,2.03997,2.067501,1.792409,1.335814,2.625999,2.49893
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15411,2022-03-23,0.5,3.01531,2.699999,2.763842,2.465342,2.515049,2.456195,2.898334,2.818288,...,1.165744,1.37153,0.892839,1.600943,1.243006,1.296201,1.742718,1.542561,2.339941,1.656408
15412,2022-03-24,1.0,3.120645,3.01531,2.699999,2.763842,2.465342,2.515049,2.456195,2.898334,...,1.331598,1.165744,1.37153,0.892839,1.600943,1.243006,1.296201,1.742718,1.542561,2.339941
15413,2022-03-25,1.0,2.556978,3.120645,3.01531,2.699999,2.763842,2.465342,2.515049,2.456195,...,1.427954,1.331598,1.165744,1.37153,0.892839,1.600943,1.243006,1.296201,1.742718,1.542561
15414,2022-03-26,1.0,2.757154,2.556978,3.120645,3.01531,2.699999,2.763842,2.465342,2.515049,...,1.383475,1.427954,1.331598,1.165744,1.37153,0.892839,1.600943,1.243006,1.296201,1.742718


In [18]:
df_stepped_u.to_csv('./csv/16_zones/wind_speed_10m_10D.csv', index=False)