
#Irma direct and Perturbation Inference Preparation


In [1]:
cd /content/drive/MyDrive/perturbation_pangu/Irma_2017/seven_day/

/content/drive/MyDrive/perturbation_pangu/Irma_2017/seven_day


In [2]:
!mkdir np_input_data_direct

mkdir: cannot create directory ‘np_input_data_direct’: File exists


In [3]:
import xarray as xr
import numpy as np
import os
import random

# Define the base path to the nc files
base_path = '/content/drive/MyDrive/perturbation_pangu/Irma_2017/seven_day/nc_input_data'

# Define your variables and pressure levels
surface_variables = ['mean_sea_level_pressure', '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature']
upper_variables = ['geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind']
pressure_levels = ['1000', '925', '850', '700', '600', '500', '400', '300', '250', '200', '150', '100', '50']

# Function to read and extract the relevant slices from each file
def read_and_slice(file_path):
    with xr.open_dataset(file_path) as ds:
        # Assuming variable names may change, find the variable by dimensions
        for var_name, da in ds.data_vars.items():
            if da.shape == (24, 721, 1440):
                return da[0].values

# Preload all the data slices
surface_data_slices = {}
upper_data_slices = {}

# Preload surface variable slices
for var in surface_variables:
    file_path = os.path.join(base_path, var, 'surface.nc')
    slice_0 = read_and_slice(file_path)
    surface_data_slices[var] = (slice_0)

# Preload upper-air variable slices
for var in upper_variables:
    for level in pressure_levels:
        file_path = os.path.join(base_path, var, f'{level}.nc')
        slice_0 = read_and_slice(file_path)
        upper_data_slices[(var, level)] = (slice_0)


In [4]:
case_total = 2
# Generate 100 cases as example
for case_number in range(1, case_total):
    # Initialize arrays to hold the data for the current case
    surface_data = []
    upper_data = np.empty((len(upper_variables), len(pressure_levels), 721, 1440))

    # Process surface variables for the current case
    for var in surface_variables:
        surface_data.append(surface_data_slices[var])

    # Convert surface data to numpy array and reshape to the desired shape
    surface_array = np.stack(surface_data, axis=0)

    # Process upper-air variables for the current case
    for i, var in enumerate(upper_variables):
        for j, level in enumerate(pressure_levels):
            upper_data[i, j] = upper_data_slices[(var, level)]
    print(case_number)
    print(surface_array.shape)
    print(upper_data.shape)
    # Save the arrays to files with the case number in the filename
    np.save(f'np_input_data_direct/input_surface_seven_day.npy', surface_array)
    np.save(f'np_input_data_direct/input_upper_seven_day.npy', upper_data)

1
(4, 721, 1440)
(5, 13, 721, 1440)


In [5]:
cd /content/drive/MyDrive/perturbation_pangu/Irma_2017/seven_day

/content/drive/MyDrive/perturbation_pangu/Irma_2017/seven_day


In [6]:
!mkdir np_input_data

mkdir: cannot create directory ‘np_input_data’: File exists


In [7]:
import xarray as xr
import numpy as np
import os
import random

# Define the base path to the nc files
base_path = '/content/drive/MyDrive/perturbation_pangu/Irma_2017/seven_day/nc_input_data'

# Define your variables and pressure levels
surface_variables = ['mean_sea_level_pressure', '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature']
upper_variables = ['geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind']
pressure_levels = ['1000', '925', '850', '700', '600', '500', '400', '300', '250', '200', '150', '100', '50']

# Function to read and extract the relevant slices from each file
def read_and_slice_perturbate(file_path):
    with xr.open_dataset(file_path) as ds:
        # Assuming variable names may change, find the variable by dimensions
        for var_name, da in ds.data_vars.items():
            if da.shape == (24, 721, 1440):
                return da[0].values, da[1].values,da[2].values,da[3].values

# Preload all the data slices
surface_data_slices = {}
upper_data_slices = {}

# Preload surface variable slices
for var in surface_variables:
    file_path = os.path.join(base_path, var, 'surface.nc')
    slice_0, slice_1 ,slice_2 ,slice_3 = read_and_slice_perturbate(file_path)
    surface_data_slices[var] = (slice_0, slice_1 ,slice_2 ,slice_3)

# Preload upper-air variable slices
for var in upper_variables:
    for level in pressure_levels:
        file_path = os.path.join(base_path, var, f'{level}.nc')
        slice_0, slice_1, slice_2 ,slice_3 = read_and_slice_perturbate(file_path)
        upper_data_slices[(var, level)] = (slice_0, slice_1 , slice_2 ,slice_3)

case_total = 101
# Generate 100 cases as example
for case_number in range(1, case_total):
    # Initialize arrays to hold the data for the current case
    surface_data = []
    upper_data = np.empty((len(upper_variables), len(pressure_levels), 721, 1440))

    # Process surface variables for the current case
    for var in surface_variables:
        time_step = random.choice([0, 1, 2,3])  # Randomly select first or second time step
        surface_data.append(surface_data_slices[var][time_step])

    # Convert surface data to numpy array and reshape to the desired shape
    surface_array = np.stack(surface_data, axis=0)

    # Process upper-air variables for the current case
    for i, var in enumerate(upper_variables):
        for j, level in enumerate(pressure_levels):
            time_step = random.choice([0,1,2,3])  # Randomly select first or second time step
            upper_data[i, j] = upper_data_slices[(var, level)][time_step]
    print(case_number)
    print(surface_array.shape)
    print(upper_data.shape)
    # Save the arrays to files with the case number in the filename
    np.save(f'np_input_data/input_surface_case_{case_number}.npy', surface_array)
    np.save(f'np_input_data/input_upper_case_{case_number}.npy', upper_data)

1
(4, 721, 1440)
(5, 13, 721, 1440)
2
(4, 721, 1440)
(5, 13, 721, 1440)
3
(4, 721, 1440)
(5, 13, 721, 1440)
4
(4, 721, 1440)
(5, 13, 721, 1440)
5
(4, 721, 1440)
(5, 13, 721, 1440)
6
(4, 721, 1440)
(5, 13, 721, 1440)
7
(4, 721, 1440)
(5, 13, 721, 1440)
8
(4, 721, 1440)
(5, 13, 721, 1440)
9
(4, 721, 1440)
(5, 13, 721, 1440)
10
(4, 721, 1440)
(5, 13, 721, 1440)
11
(4, 721, 1440)
(5, 13, 721, 1440)
12
(4, 721, 1440)
(5, 13, 721, 1440)
13
(4, 721, 1440)
(5, 13, 721, 1440)
14
(4, 721, 1440)
(5, 13, 721, 1440)
15
(4, 721, 1440)
(5, 13, 721, 1440)
16
(4, 721, 1440)
(5, 13, 721, 1440)
17
(4, 721, 1440)
(5, 13, 721, 1440)
18
(4, 721, 1440)
(5, 13, 721, 1440)
19
(4, 721, 1440)
(5, 13, 721, 1440)
20
(4, 721, 1440)
(5, 13, 721, 1440)
21
(4, 721, 1440)
(5, 13, 721, 1440)
22
(4, 721, 1440)
(5, 13, 721, 1440)
23
(4, 721, 1440)
(5, 13, 721, 1440)
24
(4, 721, 1440)
(5, 13, 721, 1440)
25
(4, 721, 1440)
(5, 13, 721, 1440)
26
(4, 721, 1440)
(5, 13, 721, 1440)
27
(4, 721, 1440)
(5, 13, 721, 1440)
28
(4, 721

#Mangkhut

In [8]:
cd /content/drive/MyDrive/perturbation_pangu/Mangkhut_2018/seven_day/

/content/drive/MyDrive/perturbation_pangu/Mangkhut_2018/seven_day


In [9]:
!mkdir np_input_data_direct

mkdir: cannot create directory ‘np_input_data_direct’: File exists


In [10]:
import xarray as xr
import numpy as np
import os
import random

# Define the base path to the nc files
base_path = '/content/drive/MyDrive/perturbation_pangu/Mangkhut_2018/seven_day/nc_input_data'

# Define your variables and pressure levels
surface_variables = ['mean_sea_level_pressure', '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature']
upper_variables = ['geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind']
pressure_levels = ['1000', '925', '850', '700', '600', '500', '400', '300', '250', '200', '150', '100', '50']

# Function to read and extract the relevant slices from each file
def read_and_slice(file_path):
    with xr.open_dataset(file_path) as ds:
        # Assuming variable names may change, find the variable by dimensions
        for var_name, da in ds.data_vars.items():
            if da.shape == (24, 721, 1440):
                return da[0].values

# Preload all the data slices
surface_data_slices = {}
upper_data_slices = {}

# Preload surface variable slices
for var in surface_variables:
    file_path = os.path.join(base_path, var, 'surface.nc')
    slice_0 = read_and_slice(file_path)
    surface_data_slices[var] = (slice_0)

# Preload upper-air variable slices
for var in upper_variables:
    for level in pressure_levels:
        file_path = os.path.join(base_path, var, f'{level}.nc')
        slice_0 = read_and_slice(file_path)
        upper_data_slices[(var, level)] = (slice_0)


In [11]:
case_total = 2
# Generate 100 cases as example
for case_number in range(1, case_total):
    # Initialize arrays to hold the data for the current case
    surface_data = []
    upper_data = np.empty((len(upper_variables), len(pressure_levels), 721, 1440))

    # Process surface variables for the current case
    for var in surface_variables:
        surface_data.append(surface_data_slices[var])

    # Convert surface data to numpy array and reshape to the desired shape
    surface_array = np.stack(surface_data, axis=0)

    # Process upper-air variables for the current case
    for i, var in enumerate(upper_variables):
        for j, level in enumerate(pressure_levels):
            upper_data[i, j] = upper_data_slices[(var, level)]
    print(case_number)
    print(surface_array.shape)
    print(upper_data.shape)
    # Save the arrays to files with the case number in the filename
    np.save(f'np_input_data_direct/input_surface_seven_day.npy', surface_array)
    np.save(f'np_input_data_direct/input_upper_seven_day.npy', upper_data)

1
(4, 721, 1440)
(5, 13, 721, 1440)


In [12]:
cd /content/drive/MyDrive/perturbation_pangu/Mangkut_2018/seven_day

[Errno 2] No such file or directory: '/content/drive/MyDrive/perturbation_pangu/Mangkut_2018/seven_day'
/content/drive/MyDrive/perturbation_pangu/Mangkhut_2018/seven_day


In [13]:
!mkdir np_input_data

In [14]:
import xarray as xr
import numpy as np
import os
import random

# Define the base path to the nc files
base_path = '/content/drive/MyDrive/perturbation_pangu/Mangkhut_2018/seven_day/nc_input_data'

# Define your variables and pressure levels
surface_variables = ['mean_sea_level_pressure', '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature']
upper_variables = ['geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind']
pressure_levels = ['1000', '925', '850', '700', '600', '500', '400', '300', '250', '200', '150', '100', '50']

# Function to read and extract the relevant slices from each file
def read_and_slice_perturbate(file_path):
    with xr.open_dataset(file_path) as ds:
        # Assuming variable names may change, find the variable by dimensions
        for var_name, da in ds.data_vars.items():
            if da.shape == (24, 721, 1440):
                return da[0].values, da[1].values,da[2].values,da[3].values

# Preload all the data slices
surface_data_slices = {}
upper_data_slices = {}

# Preload surface variable slices
for var in surface_variables:
    file_path = os.path.join(base_path, var, 'surface.nc')
    slice_0, slice_1 ,slice_2 ,slice_3 = read_and_slice_perturbate(file_path)
    surface_data_slices[var] = (slice_0, slice_1 ,slice_2 ,slice_3)

# Preload upper-air variable slices
for var in upper_variables:
    for level in pressure_levels:
        file_path = os.path.join(base_path, var, f'{level}.nc')
        slice_0, slice_1, slice_2 ,slice_3 = read_and_slice_perturbate(file_path)
        upper_data_slices[(var, level)] = (slice_0, slice_1 , slice_2 ,slice_3)

case_total = 101
# Generate 100 cases as example
for case_number in range(1, case_total):
    # Initialize arrays to hold the data for the current case
    surface_data = []
    upper_data = np.empty((len(upper_variables), len(pressure_levels), 721, 1440))

    # Process surface variables for the current case
    for var in surface_variables:
        time_step = random.choice([0, 1, 2,3])  # Randomly select first or second time step
        surface_data.append(surface_data_slices[var][time_step])

    # Convert surface data to numpy array and reshape to the desired shape
    surface_array = np.stack(surface_data, axis=0)

    # Process upper-air variables for the current case
    for i, var in enumerate(upper_variables):
        for j, level in enumerate(pressure_levels):
            time_step = random.choice([0,1,2,3])  # Randomly select first or second time step
            upper_data[i, j] = upper_data_slices[(var, level)][time_step]
    print(case_number)
    print(surface_array.shape)
    print(upper_data.shape)
    # Save the arrays to files with the case number in the filename
    np.save(f'np_input_data/input_surface_case_{case_number}.npy', surface_array)
    np.save(f'np_input_data/input_upper_case_{case_number}.npy', upper_data)

1
(4, 721, 1440)
(5, 13, 721, 1440)
2
(4, 721, 1440)
(5, 13, 721, 1440)
3
(4, 721, 1440)
(5, 13, 721, 1440)
4
(4, 721, 1440)
(5, 13, 721, 1440)
5
(4, 721, 1440)
(5, 13, 721, 1440)
6
(4, 721, 1440)
(5, 13, 721, 1440)
7
(4, 721, 1440)
(5, 13, 721, 1440)
8
(4, 721, 1440)
(5, 13, 721, 1440)
9
(4, 721, 1440)
(5, 13, 721, 1440)
10
(4, 721, 1440)
(5, 13, 721, 1440)
11
(4, 721, 1440)
(5, 13, 721, 1440)
12
(4, 721, 1440)
(5, 13, 721, 1440)
13
(4, 721, 1440)
(5, 13, 721, 1440)
14
(4, 721, 1440)
(5, 13, 721, 1440)
15
(4, 721, 1440)
(5, 13, 721, 1440)
16
(4, 721, 1440)
(5, 13, 721, 1440)
17
(4, 721, 1440)
(5, 13, 721, 1440)
18
(4, 721, 1440)
(5, 13, 721, 1440)
19
(4, 721, 1440)
(5, 13, 721, 1440)
20
(4, 721, 1440)
(5, 13, 721, 1440)
21
(4, 721, 1440)
(5, 13, 721, 1440)
22
(4, 721, 1440)
(5, 13, 721, 1440)
23
(4, 721, 1440)
(5, 13, 721, 1440)
24
(4, 721, 1440)
(5, 13, 721, 1440)
25
(4, 721, 1440)
(5, 13, 721, 1440)
26
(4, 721, 1440)
(5, 13, 721, 1440)
27
(4, 721, 1440)
(5, 13, 721, 1440)
28
(4, 721

#Debbie

In [15]:
cd /content/drive/MyDrive/perturbation_pangu/Debbie_2017/seven_day/

/content/drive/MyDrive/perturbation_pangu/Debbie_2017/seven_day


In [16]:
!mkdir np_input_data_direct

mkdir: cannot create directory ‘np_input_data_direct’: File exists


In [17]:
import xarray as xr
import numpy as np
import os
import random

# Define the base path to the nc files
base_path = '/content/drive/MyDrive/perturbation_pangu/Debbie_2017/seven_day/nc_input_data'

# Define your variables and pressure levels
surface_variables = ['mean_sea_level_pressure', '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature']
upper_variables = ['geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind']
pressure_levels = ['1000', '925', '850', '700', '600', '500', '400', '300', '250', '200', '150', '100', '50']

# Function to read and extract the relevant slices from each file
def read_and_slice(file_path):
    with xr.open_dataset(file_path) as ds:
        # Assuming variable names may change, find the variable by dimensions
        for var_name, da in ds.data_vars.items():
            if da.shape == (24, 721, 1440):
                return da[0].values

# Preload all the data slices
surface_data_slices = {}
upper_data_slices = {}

# Preload surface variable slices
for var in surface_variables:
    file_path = os.path.join(base_path, var, 'surface.nc')
    slice_0 = read_and_slice(file_path)
    surface_data_slices[var] = (slice_0)

# Preload upper-air variable slices
for var in upper_variables:
    for level in pressure_levels:
        file_path = os.path.join(base_path, var, f'{level}.nc')
        slice_0 = read_and_slice(file_path)
        upper_data_slices[(var, level)] = (slice_0)


In [18]:
case_total = 2
# Generate 100 cases as example
for case_number in range(1, case_total):
    # Initialize arrays to hold the data for the current case
    surface_data = []
    upper_data = np.empty((len(upper_variables), len(pressure_levels), 721, 1440))

    # Process surface variables for the current case
    for var in surface_variables:
        surface_data.append(surface_data_slices[var])

    # Convert surface data to numpy array and reshape to the desired shape
    surface_array = np.stack(surface_data, axis=0)

    # Process upper-air variables for the current case
    for i, var in enumerate(upper_variables):
        for j, level in enumerate(pressure_levels):
            upper_data[i, j] = upper_data_slices[(var, level)]
    print(case_number)
    print(surface_array.shape)
    print(upper_data.shape)
    # Save the arrays to files with the case number in the filename
    np.save(f'np_input_data_direct/input_surface_seven_day.npy', surface_array)
    np.save(f'np_input_data_direct/input_upper_seven_day.npy', upper_data)

1
(4, 721, 1440)
(5, 13, 721, 1440)


In [19]:
cd /content/drive/MyDrive/perturbation_pangu/Debbie_2017/seven_day

/content/drive/MyDrive/perturbation_pangu/Debbie_2017/seven_day


In [20]:
!mkdir np_input_data

mkdir: cannot create directory ‘np_input_data’: File exists


In [21]:
import xarray as xr
import numpy as np
import os
import random

# Define the base path to the nc files
base_path = '/content/drive/MyDrive/perturbation_pangu/Debbie_2017/seven_day/nc_input_data'

# Define your variables and pressure levels
surface_variables = ['mean_sea_level_pressure', '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature']
upper_variables = ['geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind']
pressure_levels = ['1000', '925', '850', '700', '600', '500', '400', '300', '250', '200', '150', '100', '50']

# Function to read and extract the relevant slices from each file
def read_and_slice_perturbate(file_path):
    with xr.open_dataset(file_path) as ds:
        # Assuming variable names may change, find the variable by dimensions
        for var_name, da in ds.data_vars.items():
            if da.shape == (24, 721, 1440):
                return da[0].values, da[1].values,da[2].values,da[3].values

# Preload all the data slices
surface_data_slices = {}
upper_data_slices = {}

# Preload surface variable slices
for var in surface_variables:
    file_path = os.path.join(base_path, var, 'surface.nc')
    slice_0, slice_1 ,slice_2 ,slice_3 = read_and_slice_perturbate(file_path)
    surface_data_slices[var] = (slice_0, slice_1 ,slice_2 ,slice_3)

# Preload upper-air variable slices
for var in upper_variables:
    for level in pressure_levels:
        file_path = os.path.join(base_path, var, f'{level}.nc')
        slice_0, slice_1, slice_2 ,slice_3 = read_and_slice_perturbate(file_path)
        upper_data_slices[(var, level)] = (slice_0, slice_1 , slice_2 ,slice_3)

case_total = 101
# Generate 100 cases as example
for case_number in range(1, case_total):
    # Initialize arrays to hold the data for the current case
    surface_data = []
    upper_data = np.empty((len(upper_variables), len(pressure_levels), 721, 1440))

    # Process surface variables for the current case
    for var in surface_variables:
        time_step = random.choice([0, 1, 2,3])  # Randomly select first or second time step
        surface_data.append(surface_data_slices[var][time_step])

    # Convert surface data to numpy array and reshape to the desired shape
    surface_array = np.stack(surface_data, axis=0)

    # Process upper-air variables for the current case
    for i, var in enumerate(upper_variables):
        for j, level in enumerate(pressure_levels):
            time_step = random.choice([0,1,2,3])  # Randomly select first or second time step
            upper_data[i, j] = upper_data_slices[(var, level)][time_step]
    print(case_number)
    print(surface_array.shape)
    print(upper_data.shape)
    # Save the arrays to files with the case number in the filename
    np.save(f'np_input_data/input_surface_case_{case_number}.npy', surface_array)
    np.save(f'np_input_data/input_upper_case_{case_number}.npy', upper_data)

1
(4, 721, 1440)
(5, 13, 721, 1440)
2
(4, 721, 1440)
(5, 13, 721, 1440)
3
(4, 721, 1440)
(5, 13, 721, 1440)
4
(4, 721, 1440)
(5, 13, 721, 1440)
5
(4, 721, 1440)
(5, 13, 721, 1440)
6
(4, 721, 1440)
(5, 13, 721, 1440)
7
(4, 721, 1440)
(5, 13, 721, 1440)
8
(4, 721, 1440)
(5, 13, 721, 1440)
9
(4, 721, 1440)
(5, 13, 721, 1440)
10
(4, 721, 1440)
(5, 13, 721, 1440)
11
(4, 721, 1440)
(5, 13, 721, 1440)
12
(4, 721, 1440)
(5, 13, 721, 1440)
13
(4, 721, 1440)
(5, 13, 721, 1440)
14
(4, 721, 1440)
(5, 13, 721, 1440)
15
(4, 721, 1440)
(5, 13, 721, 1440)
16
(4, 721, 1440)
(5, 13, 721, 1440)
17
(4, 721, 1440)
(5, 13, 721, 1440)
18
(4, 721, 1440)
(5, 13, 721, 1440)
19
(4, 721, 1440)
(5, 13, 721, 1440)
20
(4, 721, 1440)
(5, 13, 721, 1440)
21
(4, 721, 1440)
(5, 13, 721, 1440)
22
(4, 721, 1440)
(5, 13, 721, 1440)
23
(4, 721, 1440)
(5, 13, 721, 1440)
24
(4, 721, 1440)
(5, 13, 721, 1440)
25
(4, 721, 1440)
(5, 13, 721, 1440)
26
(4, 721, 1440)
(5, 13, 721, 1440)
27
(4, 721, 1440)
(5, 13, 721, 1440)
28
(4, 721