In [1]:
from google.colab import auth
auth.authenticate_user()

project_id = 'infra-forklift-352900'
!gcloud config set project {project_id}

Updated property [core/project].


In [2]:
!gsutil cp gs://tornado-alley-dataset/pro-dataset.zip .

Copying gs://tornado-alley-dataset/pro-dataset.zip...
\ [1 files][ 11.3 GiB/ 11.3 GiB]   61.6 MiB/s                                   
Operation completed over 1 objects/11.3 GiB.                                     


In [3]:
import zipfile
with zipfile.ZipFile('pro-dataset.zip', 'r') as zr:
    zr.extractall('pro-dataset')

In [10]:
from os import listdir
from os.path import join
import numpy as np
from random import shuffle

In [5]:
import tensorflow as tf
from tensorflow.data import Dataset
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.callbacks import LearningRateScheduler

In [22]:
#input and label files
fns = listdir('pro-dataset')
fninputs = [join('pro-dataset', fn) for fn in fns if 'inputs' in fn and int(fn[:4]) > 2012]
fnlabels = [join('pro-dataset', fn) for fn in fns if 'labels' in fn and int(fn[:4]) > 2012]

In [23]:
inputs = np.concatenate([np.load(fn) for fn in fninputs])
inputs.shape

(14736, 29, 16, 32, 6)

In [25]:
labels = np.concatenate([np.load(fn) for fn in fnlabels])
labels.shape

(14736, 3)

In [26]:
#shift each channel to [-1,1]
for i in range(6):
    X = inputs[:,:,:,:,i].astype(np.float64)
    X = 2*(X - X.min())/(X.max() - X.min()) - 1
    print(X.min(), X.max())
    inputs[:,:,:,:,i] = X

-1.0 1.0
-1.0 1.0
-1.0 1.0
-1.0 1.0
-1.0 1.0
-1.0 1.0


In [27]:
#form a proper dataset and split
ds = Dataset.from_tensor_slices((inputs, labels)).shuffle(inputs.shape[0])
n = inputs.shape[0]//10
train = ds.take(7*n).batch(16)
validate = ds.skip(7*n).take(2*n).batch(16)
test = ds.skip(9*n).batch(16)

In [44]:
model = Sequential([                   
    Conv3D(128, 3, padding='valid', activation='selu'),
    Conv3D(128, 3, padding='valid', activation='selu'),
    MaxPool3D((2,1,2)),
    Conv3D(256, 3, padding='valid', activation='selu'),
    Conv3D(256, 3, padding='valid', activation='selu'),
    MaxPool3D(),
    Flatten(),
    Dense(64, activation='selu'),
    Dense(64, activation='selu'),
    Dense(3)
])

In [45]:
model.compile(
    optimizer='adam',
    loss=CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [46]:
model.fit(
    train,
    epochs=16,
    validation_data=validate,
    callbacks=[LearningRateScheduler(lambda epoch, lr: lr*0.9)]
)

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


<keras.callbacks.History at 0x7ff4945daa50>