In [1]:
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
# step 2 load data
visibility = pd.read_csv('donetsk_17_21_visibility.csv', sep=';', header=0, parse_dates=True, squeeze=True)
visibility.head()

Unnamed: 0,started_at,cloud_height,cloud_amount,wind_direction,wind_speed,temperature,temperature_dew,pressure,pressure_tendency,pressure_tendency_value,visibility
0,2017-09-01 00:00:00,5,8,0,0,14.4,13.5,989.4,7,1.3,0
1,2017-09-01 03:00:00,5,8,0,0,14.2,13.4,989.3,7,0.1,0
2,2017-09-01 06:00:00,5,8,0,0,15.2,13.2,989.7,3,0.4,0
3,2017-09-01 09:00:00,5,6,25,2,17.7,13.2,988.9,8,0.8,0
4,2017-09-01 12:00:00,5,7,25,4,16.5,13.4,989.1,3,0.2,0


In [3]:
# step 3
date_time = pd.to_datetime(visibility.pop('started_at'), format='%Y-%m-%d %H:%M:%S')

In [4]:
# step 4 wind_direction to degrees
visibility['wind_direction'] = visibility['wind_direction']*10
visibility.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
cloud_height,11360.0,6.929049,2.159598,2.0,5.0,6.0,9.0,10.0
cloud_amount,11360.0,4.674912,3.246734,0.0,0.0,6.0,8.0,9.0
wind_direction,11360.0,137.132923,117.341025,0.0,20.0,110.0,250.0,360.0
wind_speed,11360.0,2.798063,2.216409,0.0,2.0,2.0,4.0,16.0
temperature,11360.0,10.70515,10.880943,-20.8,1.2,10.3,19.7,37.8
temperature_dew,11360.0,4.112782,7.817085,-22.0,-1.5,4.2,10.4,21.5
pressure,11360.0,992.652729,7.295076,964.2,987.9,992.0,997.5,1015.3
pressure_tendency,11360.0,4.805722,2.476551,1.0,2.0,4.0,7.0,8.0
pressure_tendency_value,11360.0,0.735211,0.648641,0.0,0.3,0.6,1.0,7.1
visibility,11360.0,0.042694,0.202174,0.0,0.0,0.0,0.0,1.0


In [5]:
#step 4 wind to vectors
wv = visibility.pop('wind_speed')

# Convert to radians.
wd_rad = visibility.pop('wind_direction')*np.pi / 180

# Calculate the wind x and y components.
visibility['w_x'] = wv*np.cos(wd_rad)
visibility['w_y'] = wv*np.sin(wd_rad)

In [6]:
# step 5 date_time to seconds
timestamp_s = date_time.map(pd.Timestamp.timestamp)

In [7]:
# step 6
day = 24*60*60
year = (365.2425)*day

visibility['day_sin'] = np.sin(timestamp_s * (2 * np.pi / day))
visibility['day_cos'] = np.cos(timestamp_s * (2 * np.pi / day))
visibility['year_sin'] = np.sin(timestamp_s * (2 * np.pi / year))
visibility['year_cos'] = np.cos(timestamp_s * (2 * np.pi / year))
visibility.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
cloud_height,11360.0,6.929049,2.159598,2.0,5.0,6.0,9.0,10.0
cloud_amount,11360.0,4.674912,3.246734,0.0,0.0,6.0,8.0,9.0
temperature,11360.0,10.70515,10.880943,-20.8,1.2,10.3,19.7,37.8
temperature_dew,11360.0,4.112782,7.817085,-22.0,-1.5,4.2,10.4,21.5
pressure,11360.0,992.6527,7.295076,964.2,987.9,992.0,997.5,1015.3
pressure_tendency,11360.0,4.805722,2.476551,1.0,2.0,4.0,7.0,8.0
pressure_tendency_value,11360.0,0.7352113,0.648641,0.0,0.3,0.6,1.0,7.1
visibility,11360.0,0.04269366,0.202174,0.0,0.0,0.0,0.0,1.0
w_x,11360.0,-0.01415475,2.13474,-10.0,-1.285575,0.0,1.285575,10.0
w_y,11360.0,0.480422,2.820315,-10.0,-0.68404,0.0,1.879385,16.0


In [8]:
# step 7 Let's split the data into a training and validation set
val_dataframe = visibility.sample(frac=0.2, random_state=1337)
train_dataframe = visibility.drop(val_dataframe.index)

print(
    "Using %d samples for training and %d for validation"
    % (len(train_dataframe), len(val_dataframe))
)

Using 9088 samples for training and 2272 for validation


In [9]:
# step 8 Let's generate tf.data.Dataset objects for each dataframe
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("visibility")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds


train_ds = dataframe_to_dataset(train_dataframe)
val_ds = dataframe_to_dataset(val_dataframe)

In [10]:
for x, y in train_ds.take(1):
    print("Input:", x)
    print("Target:", y)

Input: {'cloud_amount': <tf.Tensor: shape=(), dtype=int64, numpy=2>, 'pressure': <tf.Tensor: shape=(), dtype=float64, numpy=992.0>, 'year_cos': <tf.Tensor: shape=(), dtype=float64, numpy=-0.8205894423287058>, 'pressure_tendency_value': <tf.Tensor: shape=(), dtype=float64, numpy=0.9>, 'w_y': <tf.Tensor: shape=(), dtype=float64, numpy=2.0>, 'cloud_height': <tf.Tensor: shape=(), dtype=int64, numpy=6>, 'pressure_tendency': <tf.Tensor: shape=(), dtype=int64, numpy=7>, 'day_cos': <tf.Tensor: shape=(), dtype=float64, numpy=-0.7071067811940189>, 'temperature_dew': <tf.Tensor: shape=(), dtype=float64, numpy=8.4>, 'year_sin': <tf.Tensor: shape=(), dtype=float64, numpy=0.5715181249432634>, 'temperature': <tf.Tensor: shape=(), dtype=float64, numpy=25.8>, 'w_x': <tf.Tensor: shape=(), dtype=float64, numpy=1.2246467991473532e-16>, 'day_sin': <tf.Tensor: shape=(), dtype=float64, numpy=-0.7071067811790761>}
Target: tf.Tensor(0, shape=(), dtype=int64)


In [10]:
# step 9 Let's batch the datasets:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

In [15]:
# skip
# from tensorflow.keras.layers.experimental.preprocessing import IntegerLookup
# from tensorflow.keras.layers.experimental.preprocessing import Normalization
# # from tensorflow.keras.layers.experimental.preprocessing import StringLookup
# # from tensorflow.keras.layers import IntegerLookup
# # from tensorflow.keras.layers import Normalization
# # from tensorflow.keras.layers import StringLookup
# import tensorflow


# def encode_numerical_feature(feature, name, dataset):
#     # Create a Normalization layer for our feature
#     normalizer = Normalization()

#     # Prepare a Dataset that only yields our feature
#     feature_ds = dataset.map(lambda x, y: x[name])
#     feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

#     # Learn the statistics of the data
#     normalizer.adapt(feature_ds)

#     # Normalize the input feature
#     encoded_feature = normalizer(feature)
#     return encoded_feature


# def encode_categorical_feature(feature, name, dataset, is_string):
#     lookup_class = StringLookup if is_string else IntegerLookup
#     # Create a lookup layer which will turn strings into integer indices
# #     lookup = lookup_class(output_mode="binary")
#     lookup = lookup_class()

#     # Prepare a Dataset that only yields our feature
#     feature_ds = dataset.map(lambda x, y: x[name])
#     feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

#     # Learn the set of possible string values and assign them a fixed integer index
#     lookup.adapt(feature_ds)

#     # Turn the string input into integer indices
#     encoded_feature = lookup(feature)
#     return encoded_feature


In [16]:
# skip
# 
# Categorical features encoded as integers

# cloud_amount = keras.Input(shape=(1,), name="cloud_amount", dtype="int64")
# cloud_height = keras.Input(shape=(1,), name="cloud_height", dtype="int64")
# pressure_tendency = keras.Input(shape=(1,), name="pressure_tendency", dtype="int64")

# # Categorical feature encoded as string
# # thal = keras.Input(shape=(1,), name="thal", dtype="string")

# # Numerical features

# temperature = keras.Input(shape=(1,), name="temperature")
# temperature_dew = keras.Input(shape=(1,), name="temperature_dew")
# pressure = keras.Input(shape=(1,), name="pressure")
# pressure_tendency_value = keras.Input(shape=(1,), name="pressure_tendency_value")
# w_x = keras.Input(shape=(1,), name="w_x")
# w_y = keras.Input(shape=(1,), name="w_y")
# day_sin = keras.Input(shape=(1,), name="day_sin")
# day_cos = keras.Input(shape=(1,), name="day_cos")
# year_sin = keras.Input(shape=(1,), name="year_sin")
# year_cos = keras.Input(shape=(1,), name="year_cos")

# all_inputs = [
#     cloud_amount,
#     cloud_height,
#     temperature,
#     temperature_dew,
#     pressure,
#     pressure_tendency,
#     pressure_tendency_value,
#     w_x,
#     w_y,
#     day_sin,
#     day_cos,
#     year_sin,
#     year_cos
# ]

# # Integer categorical features
# cloud_height_encoded = encode_categorical_feature(cloud_height, "cloud_height", train_ds, False)
# cloud_amount_encoded = encode_categorical_feature(cloud_amount, "cloud_amount", train_ds, False)
# pressure_tendency_encoded = encode_categorical_feature(pressure_tendency, "pressure_tendency", train_ds, False)

# # String categorical features
# # thal_encoded = encode_categorical_feature(thal, "thal", train_ds, True)

# # Numerical features
# temperature_encoded = encode_numerical_feature(temperature, "temperature", train_ds)
# temperature_dew_encoded = encode_numerical_feature(temperature_dew, "temperature_dew", train_ds)
# pressure_encoded = encode_numerical_feature(pressure, "pressure", train_ds)
# pressure_tendency_value_encoded = encode_numerical_feature(pressure_tendency_value, "pressure_tendency_value", train_ds)
# w_x_encoded = encode_numerical_feature(w_x, "w_x", train_ds)
# w_y_encoded = encode_numerical_feature(w_y, "w_y", train_ds)
# day_sin_encoded = encode_numerical_feature(day_sin, "day_sin", train_ds)
# day_cos_encoded = encode_numerical_feature(day_cos, "day_cos", train_ds)
# year_sin_encoded = encode_numerical_feature(year_sin, "year_sin", train_ds)
# year_cos_encoded = encode_numerical_feature(year_cos, "year_cos", train_ds)

# all_features = layers.concatenate(
#     [
#         cloud_amount_encoded,
#         cloud_height_encoded,
#         pressure_tendency_encoded,
#         temperature_encoded,
# #         temperature_dew_encoded,
# #         pressure_encoded,
# #         pressure_tendency_value_encoded,
# #         w_x_encoded,
# #         w_y_encoded,
# #         day_sin_encoded,
# #         day_cos_encoded,
# #         year_sin_encoded,
# #         year_cos_encoded
#     ]
# )
# x = layers.Dense(32, activation="relu")(all_features)
# x = layers.Dropout(0.5)(x)
# output = layers.Dense(1, activation="sigmoid")(x)
# model = keras.Model(all_inputs, output)
# model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

TypeError: Tensors in list passed to 'values' of 'ConcatV2' Op have types [int64, int64, int64, float32] that don't all match.

In [11]:
import tensorflow
def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = preprocessing.Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature

def encode_integer_categorical_feature(feature, name, dataset):
    # Create a CategoryEncoding for our integer indices
#     encoder = sklearn.preprocessing.CategoryEncoding(output_mode="binary")
    encoder = tensorflow.keras.layers.experimental.preprocessing.CategoryEncoding(output_mode="binary")

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the space of possible indices
    encoder.adapt(feature_ds)

    # Apply one-hot encoding to our indices
    encoded_feature = encoder(feature)
    return encoded_feature

In [12]:
# step 7 prepare data and model
from tensorflow.keras.layers.experimental import preprocessing

cloud_height = keras.Input(shape=(1,), name="cloud_height", dtype="int64")
cloud_amount = keras.Input(shape=(1,), name="cloud_amount", dtype="int64")
pressure_tendency = keras.Input(shape=(1,), name="pressure_tendency", dtype="int64")

# Numerical features
# w_s = keras.Input(shape=(1,), name="w_s")
temperature = keras.Input(shape=(1,), name="temperature")
temperature_dew = keras.Input(shape=(1,), name="temperature_dew")
pressure = keras.Input(shape=(1,), name="pressure")
pressure_tendency_value = keras.Input(shape=(1,), name="pressure_tendency_value")
w_x = keras.Input(shape=(1,), name="w_x")
w_y = keras.Input(shape=(1,), name="w_y")
day_sin = keras.Input(shape=(1,), name="day_sin")
day_cos = keras.Input(shape=(1,), name="day_cos")
year_sin = keras.Input(shape=(1,), name="year_sin")
year_cos = keras.Input(shape=(1,), name="year_cos")

all_inputs = [
    cloud_height,
    cloud_amount,
    pressure_tendency,
    temperature,
    temperature_dew,
    pressure,
    pressure_tendency_value,
    w_x,
    w_y,
    day_sin,
    day_cos,
    year_sin,
    year_cos,
]

# Integer categorical features
cloud_height_encoded = encode_integer_categorical_feature(cloud_height, "cloud_height", train_ds)
cloud_amount_encoded = encode_integer_categorical_feature(cloud_amount, "cloud_amount", train_ds)
pressure_tendency_encoded = encode_integer_categorical_feature(pressure_tendency, "pressure_tendency", train_ds)

# String categorical features
# thal_encoded = encode_string_categorical_feature(thal, "thal", train_ds)

# Numerical features
temperature_encoded = encode_numerical_feature(temperature, "temperature", train_ds)
temperature_dew_encoded = encode_numerical_feature(temperature_dew, "temperature_dew", train_ds)
pressure_encoded = encode_numerical_feature(pressure, "pressure", train_ds)
pressure_tendency_value_encoded = encode_numerical_feature(pressure_tendency_value, "pressure_tendency_value", train_ds)
w_x_encoded = encode_numerical_feature(w_x, 'w_x', train_ds)
w_y_encoded = encode_numerical_feature(w_y, 'w_y', train_ds)
day_sin_encoded = encode_numerical_feature(day_sin, 'day_sin', train_ds)
day_cos_encoded = encode_numerical_feature(day_cos, 'day_cos', train_ds)
year_sin_encoded = encode_numerical_feature(year_sin, 'year_sin', train_ds)
year_cos_encoded = encode_numerical_feature(year_cos, 'year_cos', train_ds)

all_features = layers.concatenate(
    [
        cloud_height_encoded,
        cloud_amount_encoded,
        pressure_tendency_encoded,
        temperature_encoded,
        temperature_dew_encoded,
        pressure_encoded,
        w_x_encoded,
        w_y_encoded,
        day_sin_encoded,
        day_cos_encoded,
        year_sin_encoded,
        year_cos_encoded
    ]
)
x = layers.Dense(256, activation="relu")(all_features)
x = layers.BatchNormalization()(x)
x = layers.Dense(128, activation="relu")(x)
# x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.1)(x)
# x = layers.Dropout(0.5)(x)
# x = layers.Dense(256, activation="relu")(x),
# x = layers.Dropout(0.3)(x),
# output = layers.Dense(9, activation="softmax")(x)
output = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(all_inputs, output)
# model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"])
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

In [13]:
model.fit(train_ds, epochs=50, validation_data=val_ds)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f98f73cdba8>

In [22]:
model.save('visibility_donetsk_model')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: visibility_donetsk_model/assets


In [14]:
from datetime import datetime
import time
def prepare_data(date_term, telegram, model_name):
    ret = {}
    ret['cloud_height'] = int(telegram[14:15])
    ret['cloud_amount'] = int(telegram[18:19])
    ret['pressure_tendency'] = int(telegram[49:50])
    sign = ''
    if telegram[25:26] == '1':
        sign = '-'
    ret['temperature'] = float(sign+telegram[26:28]+'.'+telegram[28:29])
    sign = ''
    if telegram[31:32] == '1':
        sign = '-'
    ret['temperature_dew'] = float(sign+telegram[32:34]+'.'+telegram[34:35])
    p1 = '1'
    if telegram[37:38] != '0':
        p1 = ''
    ret['pressure'] = float(p1+telegram[37:40]+'.'+telegram[40:41])
    ret['pressure_tendency_value'] = float(telegram[50:52]+'.'+telegram[52:53])
    wv = float(telegram[21:23]+'.')
    wd_rad = int(telegram[19:21])*10*np.pi/180
    ret['w_x'] = wv*np.cos(wd_rad)
    ret['w_y'] = wv*np.sin(wd_rad)
    d = datetime.strptime(date_term, "%Y-%m-%d %H:%M:%S")
    s = time.mktime(d.timetuple())
    day = 24*60*60
    year = (365.2425)*day
    ret['day_sin'] = np.sin(s * (2 * np.pi / day))
    ret['day_cos'] = np.cos(s * (2 * np.pi / day))
    ret['year_sin'] = np.sin(s * (2 * np.pi / year))
    ret['year_cos'] = np.cos(s * (2 * np.pi / year))
    input_dict = {name: tf.convert_to_tensor([value]) for name, value in ret.items()}
#     model = tf.keras.models.load_model(model_name)
    predictions = model.predict(input_dict)
    
    return predictions
d_t = '2021-08-12 06:00:00'
telegram = 'ЩЭСМЮ 34519 11696 80000 10222 20204 39865 40096 52009 69972 72582 8635/ 333 20204 555 11024='
predictions = prepare_data(d_t, telegram, 'visibility_donetsk_model')

print("Вероятность ухудшения видимости в следующие 3 часа "+str(round(100 * predictions[0][0],2))+"%")
#     "Вероятность ухудшения видимости в следующие 3 часа %.1f (\%) проц." % (100 * predictions[0][0],)
# )
# predictions

Вероятность ухудшения видимости в следующие 3 часа 0.0%
