In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test,y_test) = mnist.load_data()
x_train = x_train/255.0
x_test = x_test/255.0

In [4]:
model = tf.keras.models.Sequential(
    [tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(128,activation='relu'),
    tf.keras.layers.Dropout(.2),
    tf.keras.layers.Dense(10)
    ])

In [5]:
x_train.shape

(60000, 28, 28)

In [6]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [7]:
model.compile(optimizer='adam',
             loss=loss_fn,
             metrics = ['accuracy'])

In [8]:
model.fit(x_train,y_train,epochs=5,verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2ed36de8130>

In [9]:
model.evaluate(x_test,y_test,verbose=2)

313/313 - 0s - loss: 0.0692 - accuracy: 0.9769 - 305ms/epoch - 976us/step


[0.06923599541187286, 0.9768999814987183]

In [10]:
np.set_printoptions(precision=3, suppress=True)
abalone_train = pd.read_csv(
    "https://storage.googleapis.com/download.tensorflow.org/data/abalone_train.csv",
    names=["Length", "Diameter", "Height", "Whole weight", "Shucked weight",
           "Viscera weight", "Shell weight", "Age"])

abalone_train.head()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Age
0,0.435,0.335,0.11,0.334,0.1355,0.0775,0.0965,7
1,0.585,0.45,0.125,0.874,0.3545,0.2075,0.225,6
2,0.655,0.51,0.16,1.092,0.396,0.2825,0.37,14
3,0.545,0.425,0.125,0.768,0.294,0.1495,0.26,16
4,0.545,0.42,0.13,0.879,0.374,0.1695,0.23,13


In [11]:
abalone_features = abalone_train.copy()
abalone_labels = abalone_features.pop('Age')
abalone_features = np.array(abalone_features)
abalone_features

array([[0.435, 0.335, 0.11 , ..., 0.136, 0.077, 0.097],
       [0.585, 0.45 , 0.125, ..., 0.354, 0.207, 0.225],
       [0.655, 0.51 , 0.16 , ..., 0.396, 0.282, 0.37 ],
       ...,
       [0.53 , 0.42 , 0.13 , ..., 0.374, 0.167, 0.249],
       [0.395, 0.315, 0.105, ..., 0.118, 0.091, 0.119],
       [0.45 , 0.355, 0.12 , ..., 0.115, 0.067, 0.16 ]])

In [12]:
abalone_model = tf.keras.Sequential([
    tf.keras.layers.Dense(64),
    tf.keras.layers.Dense(1)
])

In [13]:
abalone_model.compile(optimizer='adam',
                     loss=tf.keras.losses.MeanSquaredError(),
                     )

In [14]:
abalone_model.fit(abalone_features,abalone_labels,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2ed3909dcf0>

In [15]:
normalize = tf.keras.layers.Normalization()

In [16]:
normalize.adapt(abalone_features)

In [17]:
abalone_features[:5]

array([[0.435, 0.335, 0.11 , 0.334, 0.136, 0.077, 0.097],
       [0.585, 0.45 , 0.125, 0.874, 0.354, 0.207, 0.225],
       [0.655, 0.51 , 0.16 , 1.092, 0.396, 0.282, 0.37 ],
       [0.545, 0.425, 0.125, 0.768, 0.294, 0.149, 0.26 ],
       [0.545, 0.42 , 0.13 , 0.879, 0.374, 0.17 , 0.23 ]])

In [18]:
norm_abalone_model = tf.keras.Sequential([
    normalize,
    abalone_model
])

In [19]:
norm_abalone_model.compile(optimizer='adam',loss=tf.keras.losses.MeanSquaredError(),metrics=['accuracy'])

In [20]:
norm_abalone_model.fit(abalone_features,abalone_labels,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2ed3921d540>

In [21]:
titanic = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/train.csv")
titanic.head()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,1,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,1,female,35.0,1,0,53.1,First,C,Southampton,n
4,0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


In [22]:
titanic_features = titanic.copy()
titanic_labels = titanic_features.pop('survived')

In [23]:
input = tf.keras.Input(shape=(),dtype=tf.float32)
result = 2*input+1
result

<KerasTensor: shape=(None,) dtype=float32 (created by layer 'tf.__operators__.add')>

In [24]:
calc = tf.keras.Model(inputs=input,outputs=result)

In [25]:
print(calc(1).numpy())
print(calc(2).numpy())

3.0
5.0


In [26]:
inputs = {}
for name,column in titanic_features.items():
    dtype = column.dtype
    if dtype==object:
        dtype=tf.string
    else:
        dtype=tf.float32
    inputs[name] = tf.keras.Input(shape=(1,),name=name,dtype=dtype)
inputs

{'sex': <KerasTensor: shape=(None, 1) dtype=string (created by layer 'sex')>,
 'age': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'age')>,
 'n_siblings_spouses': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'n_siblings_spouses')>,
 'parch': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'parch')>,
 'fare': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'fare')>,
 'class': <KerasTensor: shape=(None, 1) dtype=string (created by layer 'class')>,
 'deck': <KerasTensor: shape=(None, 1) dtype=string (created by layer 'deck')>,
 'embark_town': <KerasTensor: shape=(None, 1) dtype=string (created by layer 'embark_town')>,
 'alone': <KerasTensor: shape=(None, 1) dtype=string (created by layer 'alone')>}

In [27]:
numeric_inputs = {name:input for name,input in inputs.items() if input.dtype==tf.float32}
x = tf.keras.layers.Concatenate()(list(numeric_inputs.values()))
norm = tf.keras.layers.Normalization()
norm.adapt(np.array(titanic[numeric_inputs.keys()]))
all_numeric_inputs = norm(x)
all_numeric_inputs

<KerasTensor: shape=(None, 4) dtype=float32 (created by layer 'normalization_1')>

In [28]:
preprocessed_inputs = [all_numeric_inputs]

In [29]:
preprocessed_inputs

[<KerasTensor: shape=(None, 4) dtype=float32 (created by layer 'normalization_1')>]

In [30]:
for name,input in inputs.items():
    if input.dtype==tf.float32:
        continue
    lookup = tf.keras.layers.StringLookup(vocabulary=np.unique(titanic_features[name]))
    one_hot = tf.keras.layers.CategoryEncoding(num_tokens=lookup.vocabulary_size())
    x=lookup(input)
    x=one_hot(x)
    preprocessed_inputs.append(x)

In [31]:
preprocessed_inputs_cat = tf.keras.layers.Concatenate()(preprocessed_inputs)
titanic_preprocessing=tf.keras.Model(inputs,preprocessed_inputs_cat)
tf.keras.utils.plot_model(model=titanic_preprocessing, rankdir = 'LR', dpi=72, show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [32]:
titanic_features_dict = {name: np.array(value) for name,value in titanic_features.items()}

In [33]:
titanic_features_dict

{'sex': array(['male', 'female', 'female', 'female', 'male', 'male', 'female',
        'female', 'female', 'male', 'male', 'female', 'male', 'male',
        'female', 'female', 'male', 'male', 'female', 'male', 'male',
        'female', 'male', 'male', 'female', 'female', 'male', 'male',
        'male', 'male', 'female', 'female', 'female', 'male', 'female',
        'male', 'male', 'male', 'female', 'male', 'female', 'female',
        'male', 'male', 'female', 'male', 'male', 'male', 'female', 'male',
        'male', 'male', 'male', 'female', 'male', 'male', 'male', 'male',
        'male', 'male', 'male', 'male', 'female', 'male', 'male', 'female',
        'male', 'male', 'male', 'female', 'male', 'male', 'male', 'male',
        'male', 'male', 'female', 'male', 'female', 'male', 'male', 'male',
        'male', 'male', 'male', 'female', 'male', 'female', 'male',
        'female', 'male', 'male', 'male', 'male', 'male', 'male', 'female',
        'male', 'male', 'male', 'male', 'female',

In [39]:
features_dict = {name:values[:1] for name,values in titanic_features_dict.items()}
print(features_dict)
titanic_preprocessing(features_dict)

{'sex': array(['male'], dtype=object), 'age': array([22.]), 'n_siblings_spouses': array([1], dtype=int64), 'parch': array([0], dtype=int64), 'fare': array([7.25]), 'class': array(['Third'], dtype=object), 'deck': array(['unknown'], dtype=object), 'embark_town': array(['Southampton'], dtype=object), 'alone': array(['n'], dtype=object)}


<tf.Tensor: shape=(1, 28), dtype=float32, numpy=
array([[-0.61 ,  0.395, -0.479, -0.497,  0.   ,  0.   ,  1.   ,  0.   ,
         0.   ,  0.   ,  1.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
         0.   ,  0.   ,  0.   ,  1.   ,  0.   ,  0.   ,  0.   ,  1.   ,
         0.   ,  0.   ,  1.   ,  0.   ]], dtype=float32)>

In [49]:
def titanic_model(preprocessing_head,inputs):
    body = tf.keras.Sequential([
        tf.keras.layers.Dense(64),
        tf.keras.layers.Dense(1)
    ])
    preprocessed_inputs = preprocessing_head(inputs)
    result = body(preprocessed_inputs)
    model = tf.keras.Model(inputs,result)
    model.compile(loss=tf.losses.BinaryCrossentropy(from_logits=True),
                 optimizer='adam')
    return model


In [50]:
titanic_model = titanic_model(titanic_preprocessing,inputs)

In [51]:
titanic_model.fit(x=titanic_features_dict,y=titanic_labels,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2ed5cbb7fd0>

In [52]:
titanic_model.save('test')
reloaded = tf.keras.models.load_model('test')

INFO:tensorflow:Assets written to: test\assets


In [53]:
features_dict = {name:values[:1] for name, values in titanic_features_dict.items()}
before = titanic_model(features_dict)
after = reloaded(features_dict)
assert (before-after)<1e-3
print(before)
print(after)

tf.Tensor([[-1.894]], shape=(1, 1), dtype=float32)
tf.Tensor([[-1.894]], shape=(1, 1), dtype=float32)


In [55]:
import itertools

def slices(features):
    for i in itertools.count():#for each feature take index i
        example = {name:values[i] for name,values in features.items()}
        yield example

In [57]:
for example in slices(titanic_features_dict):
    for name,value in example.items():
        print(f'{name:19s}: {value}')
    break

sex                : male
age                : 22.0
n_siblings_spouses : 1
parch              : 0
fare               : 7.25
class              : Third
deck               : unknown
embark_town        : Southampton
alone              : n


In [63]:
features_ds = tf.data.Dataset.from_tensor_slices(titanic_features_dict)

In [59]:
for example in features_ds:
    for name,value in example.items():
        print(f'{name:19s}: {value}')
    break

sex                : b'male'
age                : 22.0
n_siblings_spouses : 1
parch              : 0
fare               : 7.25
class              : b'Third'
deck               : b'unknown'
embark_town        : b'Southampton'
alone              : b'n'


In [64]:
titanic_ds = tf.data.Dataset.from_tensor_slices((titanic_features_dict,titanic_labels))

In [65]:
titanic_batches = titanic_ds.shuffle(len(titanic_labels)).batch(32)

In [66]:
titanic_model.fit(titanic_batches,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2ed63783d60>

In [67]:
titanic_file_path = tf.keras.utils.get_file("train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv")

Downloading data from https://storage.googleapis.com/tf-datasets/titanic/train.csv


In [70]:
titanic_csv_ds = tf.data.experimental.make_csv_dataset(
    titanic_file_path,
    batch_size=5,
    label_name='survived',
    num_epochs=1,
    ignore_errors=True
    )

In [75]:
for batch,label in titanic_csv_ds.take(1):
    for key,value in batch.items():
        print(f'{key:20s}: {value}')
    print()
    print(f"{'label':20s}: {label}")

sex                 : [b'female' b'male' b'male' b'female' b'male']
age                 : [35.   50.   28.    0.75 28.  ]
n_siblings_spouses  : [1 1 0 2 0]
parch               : [0 0 0 1 0]
fare                : [ 90.    106.425   7.25   19.258   7.896]
class               : [b'First' b'First' b'Third' b'Third' b'Third']
deck                : [b'C' b'C' b'unknown' b'unknown' b'unknown']
embark_town         : [b'Southampton' b'Cherbourg' b'Southampton' b'Cherbourg' b'Southampton']
alone               : [b'n' b'n' b'y' b'n' b'y']

label               : [1 0 0 1 0]


In [77]:
traffic_volume_csv_gz = tf.keras.utils.get_file(
    'Metro_Interstate_Traffic_Volume.csv.gz', 
    "https://archive.ics.uci.edu/ml/machine-learning-databases/00492/Metro_Interstate_Traffic_Volume.csv.gz",
    cache_dir='.', cache_subdir='traffic'
)

Downloading data from https://archive.ics.uci.edu/ml/machine-learning-databases/00492/Metro_Interstate_Traffic_Volume.csv.gz


In [80]:
traffic_volume_csv_gz_ds = tf.data.experimental.make_csv_dataset(
    traffic_volume_csv_gz,
    batch_size=256,
    label_name='traffic_volume',
    num_epochs=1,
    compression_type='GZIP'
)

for batch,label in traffic_volume_csv_gz_ds:
    for key,value in batch.items():
        print(f'{key:20s}: {value[:5]}')
    print(f"{'label':20s}: {label[:5]}")
    break

holiday             : [b'None' b'None' b'None' b'None' b'None']
temp                : [294.4  281.   272.99 297.2  269.6 ]
rain_1h             : [0.51 0.   0.   0.   0.  ]
snow_1h             : [0. 0. 0. 0. 0.]
clouds_all          : [ 8 90  1  1 90]
weather_main        : [b'Rain' b'Clouds' b'Clear' b'Clear' b'Clouds']
weather_description : [b'light rain' b'overcast clouds' b'sky is clear' b'sky is clear'
 b'overcast clouds']
date_time           : [b'2013-07-14 09:00:00' b'2012-10-19 12:00:00' b'2012-10-31 01:00:00'
 b'2013-05-09 14:00:00' b'2012-11-29 08:00:00']
label               : [2665 4489  346 5598 6072]


In [83]:
%%time
for i,(batch,label) in enumerate(traffic_volume_csv_gz_ds.repeat(20)):
    if i%40==0:
        print('.',end='')
print()

...............................................................................................
CPU times: total: 10.4 s
Wall time: 5.96 s


In [85]:
%%time
caching = traffic_volume_csv_gz_ds.cache().shuffle(1000)

for i,(batch,label) in enumerate(caching.shuffle(1000).repeat(20)):
    if i%40==0:
        print('.',end='')
print()

...............................................................................................
CPU times: total: 1.05 s
Wall time: 862 ms


In [86]:
%%time
snapshot = tf.data.experimental.snapshot('titanic.tfsnap')
snapshotting = traffic_volume_csv_gz_ds.apply(snapshot).shuffle(1000)
for i,(batch,label) in enumerate(snapshotting.shuffle(1000).repeat(20)):
    if i%40==0:
        print('.',end='')
print()

Instructions for updating:
Use `tf.data.Dataset.snapshot(...)`.
...............................................................................................
CPU times: total: 2.53 s
Wall time: 1.24 s


In [4]:
A = tf.constant([[1,2],[3,6]])

In [46]:
import datetime, warnings, scipy

In [48]:
import matplotlib as mpl
import seaborn as sns
import matplotlib.pyplot as plt

In [51]:
import math
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error

In [54]:
df = pd.read_csv('air_quality.csv',sep=' ')
df.head()

Unnamed: 0,No,Profile,Tanggal,Jam,O3,CO,NO2,SO2,NO,CO2,...,PM4,PM10,TSP,TEMP,HUM,WS,WD,ISPU,Status,Unnamed: 22
0,1,BANDUNG,2019-05-19,00:04:19,21.35,0.78,6.77,14.06,26.168,409.86,...,0.0,2.77,0.0,21.4,100.0,0.6,5,9,BAIK,
1,2,BANDUNG,2019-05-19,00:15:01,5.93,0.82,14.87,17.23,53.853,394.79,...,0.0,11.95,0.0,42.6,200.0,0.0,2,12,BAIK,
2,3,BANDUNG,2019-05-19,00:30:06,6.62,0.63,22.24,2.31,38.304,390.66,...,0.0,12.71,0.0,63.6,299.2,0.0,2,13,BAIK,
3,4,BANDUNG,2019-05-19,00:49:19,13.29,0.44,19.0,5.32,18.962,401.23,...,0.0,1.2,0.0,21.3,88.2,0.525,5,6,BAIK,
4,5,BANDUNG,2019-05-19,01:04:19,2.44,0.47,26.74,13.61,35.27,381.67,...,0.0,3.52,0.0,21.3,97.4,0.975,5,9,BAIK,


In [56]:
def combine_date(df,tab_name):
    list_tab = []
    for i in range(df.shape[0]):
        list_tab.append(df.loc[i,'Tanggal']+'T'+df.loc[i,tab_name][0:2])
    return np.array(list_tab, dtype='datetime64')

df['Datetime'] = combine_date(df,'Jam')

In [59]:
df2 = df.groupby(['Datetime']).mean()
df2.head()

Unnamed: 0_level_0,No,O3,CO,NO2,SO2,NO,CO2,VOC,PM1,PM2.5,PM4,PM10,TSP,TEMP,HUM,WS,WD,ISPU,Unnamed: 22
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2019-05-19 00:00:00,2.5,11.7975,0.6675,15.72,9.73,34.32175,399.135,439.223,0.0,0.0,0.0,7.1575,0.0,37.225,171.85,0.28125,3.5,10.0,
2019-05-19 01:00:00,6.0,3.816667,0.373333,22.6,20.09,39.821,388.536667,250.394,0.0,0.0,0.0,3.293333,0.0,20.966667,95.033333,0.675,4.666667,12.666667,
2019-05-19 02:00:00,9.5,6.45,0.3325,24.13,26.31,51.29325,396.2425,251.04575,0.0,0.0,0.0,6.4875,0.0,20.15,97.5,0.09375,3.75,16.25,
2019-05-19 03:00:00,13.5,4.74,0.18,25.975,24.39,43.5185,401.355,252.24875,0.0,0.0,0.0,4.7,0.0,19.85,100.0,0.075,5.0,15.0,
2019-05-19 04:00:00,17.5,19.6925,0.2025,20.395,18.6225,53.37925,399.335,694.23975,0.0,0.0,0.0,5.2625,0.0,53.475,272.5,0.1875,5.25,13.25,


In [65]:
df2.drop(index=[df2.index[0],df2.index[df2.shape[0]-1]],inplace=True)

In [66]:
df2.head()

Unnamed: 0_level_0,No,O3,CO,NO2,SO2,NO,CO2,VOC,PM1,PM2.5,PM4,PM10,TSP,TEMP,HUM,WS,WD,ISPU,Unnamed: 22
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2019-05-19 02:00:00,9.5,6.45,0.3325,24.13,26.31,51.29325,396.2425,251.04575,0.0,0.0,0.0,6.4875,0.0,20.15,97.5,0.09375,3.75,16.25,
2019-05-19 03:00:00,13.5,4.74,0.18,25.975,24.39,43.5185,401.355,252.24875,0.0,0.0,0.0,4.7,0.0,19.85,100.0,0.075,5.0,15.0,
2019-05-19 04:00:00,17.5,19.6925,0.2025,20.395,18.6225,53.37925,399.335,694.23975,0.0,0.0,0.0,5.2625,0.0,53.475,272.5,0.1875,5.25,13.25,
2019-05-19 05:00:00,21.5,17.24,0.2975,27.545,33.54,52.431,393.27,252.19325,0.0,0.0,0.0,6.51,0.0,18.05,100.0,0.24375,5.0,21.0,
2019-05-19 06:00:00,25.5,11.6125,0.7575,23.185,17.8325,37.16625,400.11,252.054,0.0,0.0,0.0,4.175,0.0,18.875,99.7,0.2625,5.0,12.0,
