## For python3.7 tensorflow have different inner structure and logic
## For easy tests, this notebook running in python3.7 and make models from python3.7

In [1]:
import csv
import gc
import json
import pickle
import os

import pandas as pd
import numpy as np
import tensorflow as tf
import tf2onnx

## Open dataset https://www.kaggle.com/datasets/jsphyg/weather-dataset-rattle-package

In [2]:
with open('datasets/weatherAUS.csv') as f:
    reader = csv.reader(f)
    _data = list(reader)
    df = pd.DataFrame(_data[1:], columns=_data[0])

In [3]:
df

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44,W,...,71,22,1007.7,1007.1,8,,16.9,21.8,No,No
1,2008-12-02,Albury,7.4,25.1,0,,,WNW,44,NNW,...,44,25,1010.6,1007.8,,,17.2,24.3,No,No
2,2008-12-03,Albury,12.9,25.7,0,,,WSW,46,W,...,38,30,1007.6,1008.7,,2,21,23.2,No,No
3,2008-12-04,Albury,9.2,28,0,,,NE,24,SE,...,45,16,1017.6,1012.8,,,18.1,26.5,No,No
4,2008-12-05,Albury,17.5,32.3,1,,,W,41,ENE,...,82,33,1010.8,1006,7,8,17.8,29.7,No,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145455,2017-06-21,Uluru,2.8,23.4,0,,,E,31,SE,...,51,24,1024.6,1020.3,,,10.1,22.4,No,No
145456,2017-06-22,Uluru,3.6,25.3,0,,,NNW,22,SE,...,56,21,1023.5,1019.1,,,10.9,24.5,No,No
145457,2017-06-23,Uluru,5.4,26.9,0,,,N,37,SE,...,53,24,1021,1016.8,,,12.5,26.1,No,No
145458,2017-06-24,Uluru,7.8,27,0,,,SE,28,SSE,...,51,24,1019.4,1016.5,3,2,15.1,26,No,No


In [4]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(8,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(
    loss='binary_crossentropy', 
    optimizer='adam', 
    metrics=['accuracy']
)

In [5]:
# Берем только числовые колонки
columns = ['RainToday', 'MinTemp', 'MaxTemp', 'Humidity9am', 'Humidity3pm', 'Pressure9am', 'Pressure3pm', 'Temp9am', 'Temp3pm']
# Убираем пустые значения
df = df[df[columns]!='NA'].dropna(subset=columns)
# Делим на X, Y
X, Y = df.loc[:, df.columns!='RainToday'], df.loc[:, df.columns=='RainToday']
df

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,,,13.4,22.9,,,,,,,...,71,22,1007.7,1007.1,,,16.9,21.8,No,
1,,,7.4,25.1,,,,,,,...,44,25,1010.6,1007.8,,,17.2,24.3,No,
2,,,12.9,25.7,,,,,,,...,38,30,1007.6,1008.7,,,21,23.2,No,
3,,,9.2,28,,,,,,,...,45,16,1017.6,1012.8,,,18.1,26.5,No,
4,,,17.5,32.3,,,,,,,...,82,33,1010.8,1006,,,17.8,29.7,No,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145454,,,3.5,21.8,,,,,,,...,59,27,1024.7,1021.2,,,9.4,20.9,No,
145455,,,2.8,23.4,,,,,,,...,51,24,1024.6,1020.3,,,10.1,22.4,No,
145456,,,3.6,25.3,,,,,,,...,56,21,1023.5,1019.1,,,10.9,24.5,No,
145457,,,5.4,26.9,,,,,,,...,53,24,1021,1016.8,,,12.5,26.1,No,


In [6]:
# Приготовим данные, приведем X в float64, а Y в int64
X[columns[1:]] = X[columns[1:]].astype({col: float for col in columns[1:]})
Y = Y.RainToday.map(dict(Yes=1, No=0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [7]:
model.fit(X[columns[1:]], Y, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x131440f10>

In [8]:
model(X[columns[1:]].values)

<tf.Tensor: shape=(125347, 1), dtype=float32, numpy=
array([[0.38235638],
       [0.02211618],
       [0.06743062],
       ...,
       [0.00384678],
       [0.00444492],
       [0.01266538]], dtype=float32)>

In [9]:
model.__call__(X[columns[1:]].values)

<tf.Tensor: shape=(125347, 1), dtype=float32, numpy=
array([[0.38235638],
       [0.02211618],
       [0.06743062],
       ...,
       [0.00384678],
       [0.00444492],
       [0.01266538]], dtype=float32)>

In [10]:
model(np.array([[1., 2., 3., 4., 5., 6., 7., 8.]]))

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.64057606]], dtype=float32)>

## Сохраняем модель

In [12]:
with open("models/tensorflow-binary_cls_model37.pckl", "wb") as f:
    pickle.dump(model, f)

    
model.save("models/tensorflow-binary_cls_model37.keras")
model.save("models/tensorflow-binary_cls_model37.h5")
# model.export("models/tensorflow-binary_cls_model37.savedmodel")

INFO:tensorflow:Assets written to: ram://94cb4b1e-9f21-4a37-a739-73f8c10cf5df/assets


## Columns
* 'MinTemp'
* 'MaxTemp'
* 'Humidity9am'
* 'Humidity3pm'
* 'Pressure9am'
* 'Pressure3pm'
* 'Temp9am'
* 'Temp3pm'

Принимает на вход массив из 8 float64 числа. Выдает тензор с числом

## Requirements
* tensorflow
* pandas