In [2]:
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
visibility = pd.read_csv('donetsk_17_21_visibility.csv', sep=';', header=0, parse_dates=True, squeeze=True)
labels = visibility.pop('visibility')

In [3]:
visibility['wind_direction'] = visibility['wind_direction']*10
wv = visibility.pop('wind_speed')

# Convert to radians.
wd_rad = visibility.pop('wind_direction')*np.pi / 180

# Calculate the wind x and y components.
visibility['w_x'] = wv*np.cos(wd_rad)
visibility['w_y'] = wv*np.sin(wd_rad)

In [4]:
date_time = pd.to_datetime(visibility.pop('started_at'), format='%Y-%m-%d %H:%M:%S')
timestamp_s = date_time.map(pd.Timestamp.timestamp)
day = 24*60*60
year = (365.2425)*day

visibility['day_sin'] = np.sin(timestamp_s * (2 * np.pi / day))
visibility['day_cos'] = np.cos(timestamp_s * (2 * np.pi / day))
visibility['year_sin'] = np.sin(timestamp_s * (2 * np.pi / year))
visibility['year_cos'] = np.cos(timestamp_s * (2 * np.pi / year))
visibility['visibility'] = labels
visibility.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
cloud_height,11360.0,6.929049,2.159598,2.0,5.0,6.0,9.0,10.0
cloud_amount,11360.0,4.674912,3.246734,0.0,0.0,6.0,8.0,9.0
temperature,11360.0,10.70515,10.880943,-20.8,1.2,10.3,19.7,37.8
temperature_dew,11360.0,4.112782,7.817085,-22.0,-1.5,4.2,10.4,21.5
pressure,11360.0,992.6527,7.295076,964.2,987.9,992.0,997.5,1015.3
pressure_tendency,11360.0,4.805722,2.476551,1.0,2.0,4.0,7.0,8.0
pressure_tendency_value,11360.0,0.7352113,0.648641,0.0,0.3,0.6,1.0,7.1
w_x,11360.0,-0.01415475,2.13474,-10.0,-1.285575,0.0,1.285575,10.0
w_y,11360.0,0.480422,2.820315,-10.0,-0.68404,0.0,1.879385,16.0
day_sin,11360.0,6.531552e-15,0.707138,-1.0,-0.707107,1.611396e-14,0.707107,1.0


In [5]:
# Split the dataframe into train, validation, and test
from sklearn.model_selection import train_test_split
train, test = train_test_split(visibility, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

7270 train examples
1818 validation examples
2272 test examples


In [6]:
train_data_file = "train_data.csv"
valid_data_file = "valid_data.csv"
test_data_file = "test_data.csv"

train.to_csv(train_data_file, index=False, header=False)
val.to_csv(valid_data_file, index=False, header=False)
test.to_csv(test_data_file, index=False, header=False)

In [7]:
CSV_HEADER = [
    "cloud_height",
    "cloud_amount",
    "temperature",
    "temperature_dew",
    "pressure",
    "pressure_tendency",
    "pressure_tendency_value",
    "w_x",
    "w_y",
    "day_sin",
    "day_cos",
    "year_sin",
    "year_cos",
    "visibility",
]
TARGET_FEATURE_NAME = "visibility"

# TARGET_FEATURE_LABELS = ["0", "1", "2", "3", "4", "5", "6", "7", "8"]
FLOAT_FEATURE_NAMES = [
    "cloud_height",
    "cloud_amount",
    "temperature",
    "temperature_dew",
    "pressure",
    "pressure_tendency",
    "pressure_tendency_value",
    "w_x",
    "w_y",
    "day_sin",
    "day_cos",
    "year_sin",
    "year_cos",
#     "visibility",
]

# NUMERIC_FEATURE_NAMES = [
#     "cloud_height",
#     "cloud_amount",
#     "pressure_tendency",
# ]

# CATEGORICAL_FEATURES_WITH_VOCABULARY = {
#     "cloud_height": list(weather["cloud_height"].unique()),
#     "cloud_amount": list(weather["cloud_amount"].unique()),
#     "pressure_tendency": list(weather["pressure_tendency"].unique())
# }

# CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())

# FEATURE_NAMES = NUMERIC_FEATURE_NAMES + FLOAT_FEATURE_NAMES #CATEGORICAL_FEATURE_NAMES
FEATURE_NAMES = FLOAT_FEATURE_NAMES

# COLUMN_DEFAULTS = [
#     [0.0] if feature_name in NUMERIC_FEATURE_NAMES + FLOAT_FEATURE_NAMES + [TARGET_FEATURE_NAME] else ["NA"]
#     for feature_name in CSV_HEADER
# ]
COLUMN_DEFAULTS = [[0], [0], [0.0], [0.0], [0.0], [0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]

NUM_CLASSES = 2 #len(TARGET_FEATURE_LABELS)
# COLUMN_DEFAULTS

In [8]:
def process(features, target):
    for feature_name in features:
        if feature_name in FLOAT_FEATURE_NAMES: #CATEGORICAL_FEATURES_WITH_VOCABULARY:
            # Cast categorical feature values to string.
            features[feature_name] = tf.cast(features[feature_name], tf.dtypes.float32)
    # Get the instance weight.
#     weight = features.pop(WEIGHT_COLUMN_NAME)
    return features, target
def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):

    dataset = tf.data.experimental.make_csv_dataset(
        csv_file_path,
        batch_size=batch_size,
        column_names=CSV_HEADER,
        column_defaults=COLUMN_DEFAULTS,
        label_name=TARGET_FEATURE_NAME,
        num_epochs=1,
        header=False,
        shuffle=shuffle,
    ).map(process)

    return dataset

In [9]:
def create_model_inputs():
    inputs = {}
    for feature_name in FEATURE_NAMES:
#         if feature_name in NUMERIC_FEATURE_NAMES:
            inputs[feature_name] = layers.Input(
                name=feature_name, shape=(), dtype=tf.float32
            )
#         else:
#             inputs[feature_name] = layers.Input(
#                 name=feature_name, shape=(), dtype=tf.string
#             )
    return inputs

In [10]:
def encode_inputs(inputs, encoding_size):
    encoded_features = []
    for feature_name in inputs:
#         if feature_name in CATEGORICAL_FEATURES_WITH_VOCABULARY:
#             vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]
#             # Create a lookup to convert a string values to an integer indices.
#             # Since we are not using a mask token nor expecting any out of vocabulary
#             # (oov) token, we set mask_token to None and  num_oov_indices to 0.
#             index = StringLookup(
#                 vocabulary=vocabulary, mask_token=None, num_oov_indices=0
#             )
#             # Convert the string input values into integer indices.
#             value_index = index(inputs[feature_name])
#             # Create an embedding layer with the specified dimensions
#             embedding_ecoder = layers.Embedding(
#                 input_dim=len(vocabulary), output_dim=encoding_size
#             )
#             # Convert the index values to embedding representations.
#             encoded_feature = embedding_ecoder(value_index)
#         else:
            # Project the numeric feature to encoding_size using linear transformation.
        encoded_feature = tf.expand_dims(inputs[feature_name], -1)
        encoded_feature = layers.Dense(units=encoding_size)(encoded_feature)
#         encoded_feature = layers.Dense(units=encoding_size, activation='relu')(encoded_feature) # mwm
        encoded_features.append(encoded_feature)
    return encoded_features

In [11]:
class GatedLinearUnit(layers.Layer):
    def __init__(self, units):
        super(GatedLinearUnit, self).__init__()
        self.linear = layers.Dense(units)
        self.sigmoid = layers.Dense(units, activation="sigmoid")

    def call(self, inputs):
        return self.linear(inputs) * self.sigmoid(inputs)

In [12]:
class GatedResidualNetwork(layers.Layer):
    def __init__(self, units, dropout_rate):
        super(GatedResidualNetwork, self).__init__()
        self.units = units
        self.elu_dense = layers.Dense(units, activation="elu")
        self.linear_dense = layers.Dense(units)
        self.dropout = layers.Dropout(dropout_rate)
        self.gated_linear_unit = GatedLinearUnit(units)
        self.layer_norm = layers.LayerNormalization()
        self.project = layers.Dense(units)

    def call(self, inputs):
        x = self.elu_dense(inputs)
        x = self.linear_dense(x)
        x = self.dropout(x)
        if inputs.shape[-1] != self.units:
            inputs = self.project(inputs)
        x = inputs + self.gated_linear_unit(x)
        x = self.layer_norm(x)
        return x

In [13]:
class VariableSelection(layers.Layer):
    def __init__(self, num_features, units, dropout_rate):
        super(VariableSelection, self).__init__()
        self.grns = list()
        # Create a GRN for each feature independently
        for idx in range(num_features):
            grn = GatedResidualNetwork(units, dropout_rate)
            self.grns.append(grn)
        # Create a GRN for the concatenation of all the features
        self.grn_concat = GatedResidualNetwork(units, dropout_rate)
        self.softmax = layers.Dense(units=num_features, activation="softmax")

    def call(self, inputs):
        v = layers.concatenate(inputs)
        v = self.grn_concat(v)
        v = tf.expand_dims(self.softmax(v), axis=-1)

        x = []
        for idx, input in enumerate(inputs):
            x.append(self.grns[idx](input))
        x = tf.stack(x, axis=1)

        outputs = tf.squeeze(tf.matmul(v, x, transpose_a=True), axis=1)
        return outputs

In [14]:
def create_model(encoding_size):
    inputs = create_model_inputs()
    feature_list = encode_inputs(inputs, encoding_size)
    num_features = len(feature_list)

    features = VariableSelection(num_features, encoding_size, dropout_rate)(
        feature_list
    )

    outputs = layers.Dense(units=1, activation="sigmoid")(features)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

In [15]:
learning_rate = 0.001
dropout_rate = 0.15
batch_size = 265
num_epochs = 20
encoding_size = 64

model = create_model(encoding_size)
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[keras.metrics.BinaryAccuracy(name="accuracy")],
)


# Create an early stopping callback.
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=5, restore_best_weights=True
)

print("Start training the model...")
train_dataset = get_dataset_from_csv(
    train_data_file, shuffle=True, batch_size=batch_size
)
valid_dataset = get_dataset_from_csv(valid_data_file, batch_size=batch_size)
model.fit(
    train_dataset,
    epochs=num_epochs,
    validation_data=valid_dataset,
    callbacks=[early_stopping],
)
print("Model training finished.")

print("Evaluating model performance...")
test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)
_, accuracy = model.evaluate(test_dataset)
print("Test accuracy: ",round(accuracy * 100, 2))

Start training the model...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Model training finished.
Evaluating model performance...
Test accuracy:  96.04


In [16]:
model.save('donetsk_visibility_grn_vsn_model')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: donetsk_visibility_grn_vsn_model/assets


In [3]:
from datetime import datetime
import time
def prepare_data(date_term, telegram, model_name):
    ret = {}
    ret['cloud_height'] = int(telegram[14:15])
    ret['cloud_amount'] = int(telegram[18:19])
    ret['pressure_tendency'] = int(telegram[49:50])
    sign = ''
    if telegram[25:26] == '1':
        sign = '-'
    ret['temperature'] = float(sign+telegram[26:28]+'.'+telegram[28:29])
    sign = ''
    if telegram[31:32] == '1':
        sign = '-'
    ret['temperature_dew'] = float(sign+telegram[32:34]+'.'+telegram[34:35])
    p1 = '1'
    if telegram[37:38] != '0':
        p1 = ''
    ret['pressure'] = float(p1+telegram[37:40]+'.'+telegram[40:41])
    ret['pressure_tendency_value'] = float(telegram[50:52]+'.'+telegram[52:53])
    wv = float(telegram[21:23]+'.')
    wd_rad = int(telegram[19:21])*10*np.pi/180
    ret['w_x'] = wv*np.cos(wd_rad)
    ret['w_y'] = wv*np.sin(wd_rad)
    d = datetime.strptime(date_term, "%Y-%m-%d %H:%M:%S")
    s = time.mktime(d.timetuple())
    day = 24*60*60
    year = (365.2425)*day
    ret['day_sin'] = np.sin(s * (2 * np.pi / day))
    ret['day_cos'] = np.cos(s * (2 * np.pi / day))
    ret['year_sin'] = np.sin(s * (2 * np.pi / year))
    ret['year_cos'] = np.cos(s * (2 * np.pi / year))
    input_dict = {name: tf.convert_to_tensor([value]) for name, value in ret.items()}
    model = tf.keras.models.load_model(model_name)
    predictions = model.predict(input_dict)
    
    return predictions

d_t = '2021-08-25 06:00:00'
telegram = 'ЩЭСМЮ 34519 32997 01602 10216 20136 39929 40164 54000 333 20160 555 10029='
predictions = prepare_data(d_t, telegram, 'donetsk_visibility_grn_vsn_model')
# prob = tf.nn.sigmoid(predictions[0])
print(
    "Вероятность ухудшения видимости в оставшееся до срока время %.1f проц. "
    % (100 * predictions[0][0])
)


Вероятность ухудшения видимости в оставшееся до срока время 1.0 проц. 


In [18]:
predictions

array([[0.00364822]], dtype=float32)