## Hands on the 3D Convolutional Layers in PyTorch

Since our input data is (6,241,121) data array, we may need 3D convolutional layers in the image encoder. Here we use PyTorch [Conv3d](https://pytorch.org/docs/stable/generated/torch.nn.Conv3d.html#torch.nn.Conv3d) to build an encoder.vrxiv.org/abs/2205.14100)

In [1]:
# Data Loader
import numpy as np
import pandas as pd
import os

DATA_PATH = '../../data/ncep_npy/'
# Walk through the sub-directories
file_list = []
for root, dirs, files in os.walk(DATA_PATH):
    for name in files:
        if name.endswith('.npy'):
            date = name.replace('.npy','')
            url = os.path.join(root, name)
            file_list.append({'date':date, 'furi':url})

file_list = pd.DataFrame(file_list)
print(file_list.shape)
print(file_list.head())

(0, 0)
Empty DataFrame
Columns: []
Index: []


In [2]:
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

import cartopy
import cartopy.crs as ccrs
import matplotlib.pyplot as plt

data = np.load('../../data/ncep_npy/20110731.npy')

def plot_surface_data(data, title=None):
    ''' Visualize the 6x241x121 dataset.  '''
    # Retrieve each variable
    pwat = data[0,:,:]
    slp = data[1,:,:]
    rh = data[2,:,:]
    t = data[3,:,:]
    u = data[4,:,:]
    v = data[5,:,:]
    # Create coordinate
    x = np.linspace(60, 180, 241)
    y = np.linspace(0, 60, 121)
    x2d, y2d = np.meshgrid(x, y)
    # Create map
    crs = ccrs.PlateCarree()
    fig = plt.figure(figsize=(12, 6))
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.coastlines(linewidths=0.3, alpha=0.8)
    # draw variables
    water = ax.contourf(x2d, y2d, rh*100, transform=crs, levels=11, cmap='binary', alpha=0.3, label='RH [%]')
    l1 = ax.contour(x2d, y2d, slp*14000+93000, transform=crs, levels=11, colors='black', linestyles='solid', linewidths=0.8, label='P [pa]')
    l2 = ax.contour(x2d, y2d, t*100+220, transform=crs, levels=11, colors='black', linestyles='dashed', linewidths=0.5, label='T [K]')
    wind = ax.streamplot(x2d, y2d, u*120-60, v*120-60, transform=crs, color='blue')
    # draw addons
    ax.set_extent([60, 180, 0, 60], crs=crs)
    if not title is None:
        ax.set_title(title)
    plt.show()
    #
    return(0)

plot_surface_data(data, title='20110731-00Z')

FileNotFoundError: [Errno 2] No such file or directory: '../../data/ncep_npy/20110731.npy'

In [None]:
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html#creating-a-custom-dataset-for-your-files
import os
import numpy as np
from torchvision.io import read_image
import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader

class MySurfaceDataset(Dataset):
    def __init__(self, data_info, npy_dir, transform=None, target_transform=None):
        self.data_info = data_info
        self.npy_dir = npy_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return self.data_info.shape[0]

    def __getitem__(self, idx):
        data_path = self.data_info['furi'].iloc[idx]
        np_array = np.load(data_path)
        data = torch.from_numpy(np_array)
        label = self.data_info['date'].iloc[idx]
        if self.transform:
            data = self.transform(data)
        if self.target_transform:
            label = self.target_transform(label)
        return data, label

ncep_data = MySurfaceDataset(data_info=file_list, npy_dir='../../data/ncep_npy/')

In [None]:
#!pip install torchinfo
import torch
import torch.nn as nn
import torch.optim as optim
from torchinfo import summary
 
# Define the autoencoder architecture
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.BatchNorm2d(6, affine=False),
            nn.Conv2d(6, 16, kernel_size=(4, 4), stride=2, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(16, affine=False),
            nn.Conv2d(16, 32, kernel_size=(4, 4), stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 8, kernel_size=(4, 4), stride=3, padding=1),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 32, 
                               kernel_size=(4, 4), 
                               stride=3, 
                               padding=1, 
                               output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, 
                               kernel_size=(4, 4), 
                               stride=2, 
                               padding=1, 
                               output_padding=0),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 6, 
                               kernel_size=(4, 4), 
                               stride=2, 
                               padding=1, 
                               output_padding=1),
            nn.Sigmoid()
        )
         
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
 
 
# Initialize the autoencoder
model = Autoencoder()
 
print(model)
print(summary(model, input_size=(64, 6, 241, 121)))

In [None]:
# Initialize the autoencoder
model = Autoencoder()

# Move the model to GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model.to(device)
 
# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


batch_size = 256
# Create data loaders.
dataloader = DataLoader(ncep_data, batch_size=batch_size)

# Train the autoencoder
num_epochs = 10
model = model.half()
for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img = img.to(device)
        optimizer.zero_grad()
        output = model(img)
        loss = criterion(output, img)
        loss.backward()
        optimizer.step()
    if epoch % 5== 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
 

In [None]:
# Save the model
torch.save(model.state_dict(), '../../data/conv_autoencoder.pth')