In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
tf.__version__

'2.3.1'

In [2]:
visibility = pd.read_csv('donetsk_17_21_visibility.csv', sep=';', header=0, parse_dates=True, squeeze=True)
visibility.head()

Unnamed: 0,started_at,cloud_height,cloud_amount,wind_direction,wind_speed,temperature,temperature_dew,pressure,pressure_tendency,pressure_tendency_value,visibility
0,2017-09-01 00:00:00,5,8,0,0,14.4,13.5,989.4,7,1.3,0
1,2017-09-01 03:00:00,5,8,0,0,14.2,13.4,989.3,7,0.1,0
2,2017-09-01 06:00:00,5,8,0,0,15.2,13.2,989.7,3,0.4,0
3,2017-09-01 09:00:00,5,6,25,2,17.7,13.2,988.9,8,0.8,0
4,2017-09-01 12:00:00,5,7,25,4,16.5,13.4,989.1,3,0.2,0


In [3]:
#step 4 wind to vectors
visibility['wind_direction'] = visibility['wind_direction']*10
wv = visibility.pop('wind_speed')

# Convert to radians.
wd_rad = visibility.pop('wind_direction')*np.pi / 180

# Calculate the wind x and y components.
visibility['w_x'] = wv*np.cos(wd_rad)
visibility['w_y'] = wv*np.sin(wd_rad)

In [4]:
# step 5 date_time to seconds
date_time = pd.to_datetime(visibility.pop('started_at'), format='%Y-%m-%d %H:%M:%S')
timestamp_s = date_time.map(pd.Timestamp.timestamp)

In [5]:
# step 6
day = 24*60*60
year = (365.2425)*day

visibility['day_sin'] = np.sin(timestamp_s * (2 * np.pi / day))
visibility['day_cos'] = np.cos(timestamp_s * (2 * np.pi / day))
visibility['year_sin'] = np.sin(timestamp_s * (2 * np.pi / year))
visibility['year_cos'] = np.cos(timestamp_s * (2 * np.pi / year))
visibility.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
cloud_height,11360.0,6.929049,2.159598,2.0,5.0,6.0,9.0,10.0
cloud_amount,11360.0,4.674912,3.246734,0.0,0.0,6.0,8.0,9.0
temperature,11360.0,10.70515,10.880943,-20.8,1.2,10.3,19.7,37.8
temperature_dew,11360.0,4.112782,7.817085,-22.0,-1.5,4.2,10.4,21.5
pressure,11360.0,992.6527,7.295076,964.2,987.9,992.0,997.5,1015.3
pressure_tendency,11360.0,4.805722,2.476551,1.0,2.0,4.0,7.0,8.0
pressure_tendency_value,11360.0,0.7352113,0.648641,0.0,0.3,0.6,1.0,7.1
visibility,11360.0,0.04269366,0.202174,0.0,0.0,0.0,0.0,1.0
w_x,11360.0,-0.01415475,2.13474,-10.0,-1.285575,0.0,1.285575,10.0
w_y,11360.0,0.480422,2.820315,-10.0,-0.68404,0.0,1.879385,16.0


In [6]:
# Split the dataframe into train, validation, and test
train, test = train_test_split(visibility, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')


7270 train examples
1818 validation examples
2272 test examples


In [7]:
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('visibility')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  ds = ds.prefetch(batch_size)
  return ds

In [8]:
batch_size = 5
train_ds = df_to_dataset(train, batch_size=batch_size)

In [9]:
[(train_features, label_batch)] = train_ds.take(1)
print('Every feature:', list(train_features.keys()))
print('A batch of pressure:', train_features['pressure'])
print('A batch of targets:', label_batch )

Every feature: ['day_sin', 'w_x', 'year_sin', 'cloud_amount', 'pressure', 'year_cos', 'day_cos', 'pressure_tendency', 'temperature_dew', 'w_y', 'cloud_height', 'pressure_tendency_value', 'temperature']
A batch of pressure: tf.Tensor([ 992.6 1004.6  990.6  998.2 1012. ], shape=(5,), dtype=float64)
A batch of targets: tf.Tensor([0 0 0 0 0], shape=(5,), dtype=int64)


In [10]:
def get_normalization_layer(name, dataset):
  # Create a Normalization layer for our feature.
  normalizer = preprocessing.Normalization(axis=None)

  # Prepare a Dataset that only yields our feature.
  feature_ds = dataset.map(lambda x, y: x[name])

  # Learn the statistics of the data.
  normalizer.adapt(feature_ds)

  return normalizer

In [11]:
def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):
  # Create a StringLookup layer which will turn strings into integer indices
  if dtype == 'string':
    index = preprocessing.StringLookup(max_tokens=max_tokens)
  else:
    index = preprocessing.IntegerLookup(max_tokens=max_tokens)

  # Prepare a Dataset that only yields our feature
  feature_ds = dataset.map(lambda x, y: x[name])

  # Learn the set of possible values and assign them a fixed integer index.
  index.adapt(feature_ds)

  # Create a Discretization for our integer indices.
  encoder = preprocessing.CategoryEncoding(num_tokens=index.vocabulary_size())

  # Apply one-hot encoding to our indices. The lambda function captures the
  # layer so we can use them, or include them in the functional model later.
  return lambda feature: encoder(index(feature))


In [12]:
# batch_size = 256
batch_size = 64
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [13]:
all_inputs = []
encoded_features = []

# Numeric features.
for header in ['cloud_amount','cloud_height','pressure_tendency','temperature','temperature_dew','pressure','pressure_tendency_value','w_x','w_y','day_sin','day_cos','year_sin','year_cos']:
  numeric_col = tf.keras.Input(shape=(1,), name=header)
  normalization_layer = get_normalization_layer(header, train_ds)
  encoded_numeric_col = normalization_layer(numeric_col)
  all_inputs.append(numeric_col)
  encoded_features.append(encoded_numeric_col)
# encoded_features

In [None]:
# skip
# Categorical features encoded as integers.
# for header in ['cloud_amount','cloud_height','pressure_tendency']:
amount_col = tf.keras.Input(shape=(1,), name='cloud_amount', dtype='int64')
encoding_layer = get_category_encoding_layer('cloud_amount', train_ds, dtype='int64') #, max_tokens=10)
encoded_amount_col = encoding_layer(amount_col)
all_inputs.append(amount_col)
encoded_features.append(encoded_amount_col)

height_col = tf.keras.Input(shape=(1,), name='cloud_height', dtype='int64')
encoding_layer = get_category_encoding_layer('cloud_height', train_ds, dtype='int64', 
                                             max_tokens=10)
encoded_height_col = encoding_layer(height_col)
all_inputs.append(height_col)
encoded_features.append(encoded_height_col)

tendency_col = tf.keras.Input(shape=(1,), name='pressure_tendency', dtype='int64')
encoding_layer = get_category_encoding_layer('pressure_tendency', train_ds, dtype='int64', max_tokens=10)
encoded_tendency_col = encoding_layer(tendency_col)
all_inputs.append(tendency_col)
encoded_features.append(encoded_tendency_col)

In [14]:
all_features = tf.keras.layers.concatenate(encoded_features)
x = tf.keras.layers.Dense(32, activation="relu")(all_features)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(all_inputs, output)
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=["accuracy"])


In [15]:
model.fit(train_ds, epochs=10, validation_data=val_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f71ba1a7550>

In [17]:
model.save('donetsk_visibility_model_2')
# reloaded_model = tf.keras.models.load_model('donetsk_visibility_model_2')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: donetsk_visibility_model_2/assets


In [16]:
from datetime import datetime
import time
def prepare_data(date_term, telegram, model_name):
    ret = {}
    ret['cloud_height'] = int(telegram[14:15])
    ret['cloud_amount'] = int(telegram[18:19])
    ret['pressure_tendency'] = int(telegram[49:50])
    sign = ''
    if telegram[25:26] == '1':
        sign = '-'
    ret['temperature'] = float(sign+telegram[26:28]+'.'+telegram[28:29])
    sign = ''
    if telegram[31:32] == '1':
        sign = '-'
    ret['temperature_dew'] = float(sign+telegram[32:34]+'.'+telegram[34:35])
    p1 = '1'
    if telegram[37:38] != '0':
        p1 = ''
    ret['pressure'] = float(p1+telegram[37:40]+'.'+telegram[40:41])
    ret['pressure_tendency_value'] = float(telegram[50:52]+'.'+telegram[52:53])
    wv = float(telegram[21:23]+'.')
    wd_rad = int(telegram[19:21])*10*np.pi/180
    ret['w_x'] = wv*np.cos(wd_rad)
    ret['w_y'] = wv*np.sin(wd_rad)
    d = datetime.strptime(date_term, "%Y-%m-%d %H:%M:%S")
    s = time.mktime(d.timetuple())
    day = 24*60*60
    year = (365.2425)*day
    ret['day_sin'] = np.sin(s * (2 * np.pi / day))
    ret['day_cos'] = np.cos(s * (2 * np.pi / day))
    ret['year_sin'] = np.sin(s * (2 * np.pi / year))
    ret['year_cos'] = np.cos(s * (2 * np.pi / year))
    input_dict = {name: tf.convert_to_tensor([value]) for name, value in ret.items()}
    model = tf.keras.models.load_model(model_name)
    predictions = model.predict(input_dict)
    
    return predictions

d_t = '2021-08-20 09:00:00'
telegram = 'ЩЭСИД 34519 41697 52502 10244 20181 39841 40071 52006 70298 85100 555 1/036='
predictions = prepare_data(d_t, telegram, 'donetsk_visibility_model_2')
prob = tf.nn.sigmoid(predictions[0])
print(
    "Вероятность ухудшения видимости в оставшееся до срока время %.1f проц. "
    % (100 * prob)
)


Вероятность ухудшения видимости в оставшееся до срока время 1.8 проц. 


In [19]:
prob

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.02367917], dtype=float32)>