<a href="https://colab.research.google.com/github/juglar-diaz/STTD/blob/master/STT_TF_rnn_MultiContext.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Intro

In [0]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

# Load the TensorBoard notebook extension.
%load_ext tensorboard

TensorFlow 2.x selected.


In [0]:
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'
print(accelerator)



cu80


In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals
import math
import tensorflow as tf
from tensorflow import keras

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

import unicodedata
import re
import numpy as np
import os
import io
import itertools


import random
import pickle
import os
sep = os.sep
import os.path

import pandas as pd
import bisect
import time
import scipy.stats as stats

import datetime
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.cluster import MeanShift
from sklearn.neighbors import NearestNeighbors

print(tf.__version__)

2.0.0


In [0]:
t = pd.Timestamp(year=2017, month=12, day=2, hour=12)

In [0]:
t.weekday()

5

In [0]:
t.weekofyear

48

In [0]:
device_name = tf.test.gpu_device_name()
if not tf.test.is_gpu_available():
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [0]:
import sys
# the mock-0.3.1 dir contains testcase.py, testutils.py & mock.py
sys.path.append('drive/My Drive/Colab Notebooks/STTD/Retrieval_TF/')


In [0]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


##Pre-processing

###Discretizers

In [0]:
class Discretize:
    def __init__(self):
        pass
    def fit_transform(self):
        pass
    def transform(self):
        pass

In [0]:
class Round(Discretize):
    def __init__(self, div= 0.001):
        self.div= div
    def discretize(self, latitudes, longitudes):
        lats = [round(lat - lat%self.div + self.div/2, 6) for lat in latitudes]
        longs = [round(lon - lon%self.div + self.div/2, 6) for lon in longitudes]
        discretizations = list(zip(lats, longs))
        return discretizations
    
    def fit_transform(self, latitudes, longitudes):
        discretizations = self.discretize(latitudes, longitudes)
        lats = list(latitudes)
        longs = list(longitudes)
        self.mapping = {disc:[] for disc in discretizations}
        for i in range(len(discretizations)):
            self.mapping[discretizations[i]].append((lats[i], longs[i]))
        return discretizations

    def transform(self, latitudes, longitudes):
        return self.discretize(latitudes, longitudes)
    
    def measure(self, disc1, disc2):
        dist_lat = disc1[0]-disc2[0]
        dist_long = disc1[0]-disc2[0]
        return math.sqrt(dist_lat*dist_lat+dist_long*dist_long)

In [0]:
class TimeDiscretizer(Discretize):
    def discretize(self, created_at):
        pass
    
    def fit_transform(self, created_at):
        discretizations = self.discretize(created_at)
        times = list(created_at)
        self.mapping = {disc:[] for disc in discretizations}
        for i in range(len(discretizations)):
            self.mapping[discretizations[i]].append(times[i])
        return discretizations   
    
    def transform(self, created_at):
        return self.discretize(created_at)

class Hour(TimeDiscretizer):
    def discretize(self, created_at):
        indi = pd.DatetimeIndex(created_at)
        return list(indi.hour)
    
class Day(TimeDiscretizer):
    def discretize(self, created_at):
        indi = pd.DatetimeIndex(created_at)
        return list(indi.weekday)

class Week(TimeDiscretizer):
    def discretize(self, created_at):
        indi = pd.DatetimeIndex(created_at)
        return list(indi.hour%4)

class Month(TimeDiscretizer):
    def discretize(self, created_at):
        indi = pd.DatetimeIndex(created_at)
        return list(indi.month)
           
    

###Build vocab, index.

In [0]:
data = ""

def buildIndexData(list_elements):
    idx2data = {index:element  for index,element in enumerate(sorted(set(list_elements)))}
    data2idx = {element:index for index,element in idx2data.items()}
    return data2idx, idx2data
        
class Indexer():
    def __init__(self):
        pass
    
    def build_vocab(self, l, vocab_size = 0, vocab_mincount = 0):
        counter_l = Counter(l)
        if (vocab_size > 0):
            pairs = counter_l.most_common(vocab_size)
        else:
            pairs = list(counter_l.items())
        return set([token for token, count in pairs if count >= vocab_mincount])

    def fit_transform(self,
            df,
            dates_vocab_size = 0, dates_vocab_mincount = 0,
            places_vocab_size = 0, places_vocab_mincount = 0,
            words_vocab_size = 0, words_vocab_mincount = 0): 

        self.time_discretizer_hour = Hour()
        self.time_discretizer_day = Day()
        self.time_discretizer_week = Week()
        self.time_discretizer_month = Month()

        self.coor_discretizer_1 = Round(0.001)
        self.coor_discretizer_2 = Round(0.002)
        self.coor_discretizer_4 = Round(0.004)
        self.coor_discretizer_8 = Round(0.008)

       
        #file_csv has columns created_at, latitude, longitude, text
        dates = self.time_discretizer_hour.fit_transform(df['created_at'])
        days = self.time_discretizer_day.fit_transform(df['created_at'])
        weeks = self.time_discretizer_week.fit_transform(df['created_at'])
        months = self.time_discretizer_month.fit_transform(df['created_at'])
        
        places = self.coor_discretizer_1.fit_transform(df['latitude'], df['longitude'])
        places2 = self.coor_discretizer_2.fit_transform(df['latitude'], df['longitude'])
        places4 = self.coor_discretizer_4.fit_transform(df['latitude'], df['longitude'])
        places8 = self.coor_discretizer_8.fit_transform(df['latitude'], df['longitude'])
        
        
        
        texts = list(df['texts'].astype(str))
        words = [word for list_words in texts for word in list_words.split()]
        
        self.vocab_dates = self.build_vocab(dates, dates_vocab_size, dates_vocab_mincount)
        self.vocab_places = self.build_vocab(places, places_vocab_size, places_vocab_mincount)
        
        self.vocab_words = self.build_vocab(words, words_vocab_size, words_vocab_mincount)
        
        filtered_dates = set([i for i in range(len(dates)) if dates[i] in self.vocab_dates ])
        filtered_places = set([i for i in range(len(places)) if places[i] in self.vocab_places])
        
        filtered_words = set([i for i in range(len(texts)) if any([word in self.vocab_words for word in texts[i].split()]) ])
        filtered = list(filtered_dates.intersection(filtered_places).intersection(filtered_words))
        
        dates = [dates[i] for i in filtered]
        days = [days[i] for i in filtered]
        weeks = [weeks[i] for i in filtered]
        months = [months[i] for i in filtered]

        places = [places[i] for i in filtered]
        places2 = [places2[i] for i in filtered]
        places4 = [places4[i] for i in filtered]
        places8 = [places8[i] for i in filtered]
        
        texts = [texts[i] for i in filtered]

        self.word2idx, self.idx2word = buildIndexData(self.vocab_words)

        self.date2idx, self.idx2date = buildIndexData(dates)
        self.day2idx, self.idx2day = buildIndexData(days)
        self.week2idx, self.idx2week = buildIndexData(weeks)
        self.month2idx, self.idx2month = buildIndexData(months)

        self.place2idx, self.idx2place = buildIndexData(places)
        self.place2_2idx, self.idx2place2 = buildIndexData(places2)
        self.place4_2idx, self.idx2place4 = buildIndexData(places4)
        self.place8_2idx, self.idx2place8 = buildIndexData(places8)
        
        self.date2idx["<UNK>"] = len(self.date2idx)
        self.idx2date[len(self.date2idx)] = "<UNK>"
        self.day2idx["<UNK>"] = len(self.day2idx)
        self.idx2day[len(self.day2idx)] = "<UNK>"
        self.week2idx["<UNK>"] = len(self.week2idx)
        self.idx2week[len(self.week2idx)] = "<UNK>"
        self.month2idx["<UNK>"] = len(self.month2idx)
        self.idx2month[len(self.month2idx)] = "<UNK>"

        self.place2idx["<UNK>"] = len(self.place2idx)
        self.idx2place[len(self.place2idx)] = "<UNK>"
        self.place2_2idx["<UNK>"] = len(self.place2_2idx)
        self.idx2place2[len(self.place2_2idx)] = "<UNK>"
        self.place4_2idx["<UNK>"] = len(self.place4_2idx)
        self.idx2place4[len(self.place4_2idx)] = "<UNK>"
        self.place8_2idx["<UNK>"] = len(self.place8_2idx)
        self.idx2place8[len(self.place8_2idx)] = "<UNK>"

        idxsdates = [self.date2idx.get(date, self.date2idx["<UNK>"]) for date in dates]
        idxsdays = [self.day2idx.get(day, self.day2idx["<UNK>"]) for day in days]
        idxsweeks = [self.week2idx.get(week, self.week2idx["<UNK>"]) for week in weeks]
        idxsmonths = [self.month2idx.get(month, self.month2idx["<UNK>"]) for month in months]
        
        idxsplaces = [self.place2idx.get(place, self.place2idx["<UNK>"]) for place in places]
        idxsplaces2 = [self.place2_2idx.get(place, self.place2_2idx["<UNK>"]) for place in places2]
        idxsplaces4 = [self.place4_2idx.get(place, self.place4_2idx["<UNK>"]) for place in places4]
        idxsplaces8 = [self.place8_2idx.get(place, self.place8_2idx["<UNK>"]) for place in places8]

        vocab_size = len(self.word2idx)
        self.word2idx["<PAD>"] = vocab_size
        self.word2idx["<UNK>"] = vocab_size+1 
        self.word2idx["<START>"] = vocab_size+2
        self.word2idx["<END>"] = vocab_size+3
        

        self.idx2word[vocab_size] = "<PAD>"
        self.idx2word[vocab_size+1] = "<UNK>"
        self.idx2word[vocab_size+2] = "<START>"
        self.idx2word[vocab_size+3] = "<END>"

        idxstexts = []
        for text in texts:
            indexed_text = [self.word2idx["<START>"]] +[self.word2idx[word] for word in text.split() if word in self.vocab_words]+[self.word2idx["<END>"]]
            idxstexts.append(indexed_text)

        
        full_list = list(zip(idxsdates, idxsdays, idxsweeks, idxsmonths, idxsplaces, idxsplaces2, idxsplaces4, idxsplaces8, idxstexts))
        clean_list = [[x[0]]+ [x[1]]+ [x[2]]+ [x[3]]+[x[4]]+ [x[5]]+[x[6]]+ [x[7]]+ x[8] for x in full_list]
        return clean_list

    def transform(self, df):
        dates  = self.time_discretizer_hour.fit_transform(df['created_at'])
        days   = self.time_discretizer_day.fit_transform(df['created_at'])
        weeks  = self.time_discretizer_week.fit_transform(df['created_at'])
        months = self.time_discretizer_month.fit_transform(df['created_at'])
        
        places  = self.coor_discretizer_1.fit_transform(df['latitude'], df['longitude'])
        places2 = self.coor_discretizer_2.fit_transform(df['latitude'], df['longitude'])
        places4 = self.coor_discretizer_4.fit_transform(df['latitude'], df['longitude'])
        places8 = self.coor_discretizer_8.fit_transform(df['latitude'], df['longitude'])

        idxsdates  = [self.date2idx.get(date, self.date2idx["<UNK>"]) for date in dates]
        idxsdays   = [self.day2idx.get(day, self.day2idx["<UNK>"]) for day in days]
        idxsweeks  = [self.week2idx.get(week, self.week2idx["<UNK>"]) for week in weeks]
        idxsmonths = [self.month2idx.get(month, self.month2idx["<UNK>"]) for month in months]
        
        idxsplaces  = [self.place2idx.get(place, self.place2idx["<UNK>"]) for place in places]
        idxsplaces2 = [self.place2_2idx.get(place, self.place2_2idx["<UNK>"]) for place in places2]
        idxsplaces4 = [self.place4_2idx.get(place, self.place4_2idx["<UNK>"]) for place in places4]
        idxsplaces8 = [self.place8_2idx.get(place, self.place8_2idx["<UNK>"]) for place in places8]
        
        idxstexts = []
        for text in df['texts'].astype(str):
            indexed_text = [self.word2idx["<START>"]] +[self.word2idx[word] for word in text.split() if word in self.vocab_words]+[self.word2idx["<END>"]]
            idxstexts.append(indexed_text)

        full_list = list(zip(idxsdates, idxsdays, idxsweeks, idxsmonths, idxsplaces, idxsplaces2, idxsplaces4, idxsplaces8, idxstexts))
        clean_list = [[x[0]]+ [x[1]]+ [x[2]]+ [x[3]]+[x[4]]+ [x[5]]+[x[6]]+ [x[7]]+ x[8] for x in full_list if ((x[0] != None) and (x[4] != None) and (len(x[8]) > 1)) ]
        return clean_list
      
    def Item2idx(self, item):
        return self.item2idx.get(item, -1)

    def Idx2item(self, index):
        return self.idx2item.get(index, None)





##Params pre-processing

In [0]:
path_data = 'drive/My Drive/Colab Notebooks/STTD/Data/'
dataset = "tweetsNY.csv"

places_vocab_size = 1000
            
dates_vocab_mincount= 0 
places_vocab_mincount=0
words_vocab_mincount=100

###Split Train-Test and Index

In [0]:
df = pd.read_csv(path_data+dataset)

In [0]:
columns = list(df.columns.values)
print('Done load data')

if not ('created_at' in columns):
    print("Need a column name created_at with timestamps")
                    
if not ('latitude' in columns and 'longitude' in columns):
    print("Need a columns names latitude and longitude")
                    
if not ('texts' in columns):

    print("Need a column name texts with texts")
                    
length = len(df)                   
                    
train_range =  np.r_[0:int(0.6*length)]
val_range =  np.r_[int(0.6*length):int(0.8*length)]
test_range =  np.r_[int(0.8*length):length]
                    
train = df.loc[train_range, :]
val = df.loc[val_range, :]
test = df.loc[test_range, :]

indexer = Indexer()
print('Start indexing ')
            
train_data = indexer.fit_transform(train, 
                                   dates_vocab_mincount=dates_vocab_mincount, 
                                   words_vocab_mincount=words_vocab_mincount, 
                                   places_vocab_mincount=places_vocab_mincount,
                                   places_vocab_size=places_vocab_size)
print('Done indexing train')


Done load data
Start indexing 
Done indexing train


In [0]:
val_data  = indexer.transform(val)
print('Done indexing val')
test_data = indexer.transform(test)
print('Done indexing test')

Done indexing val
Done indexing test


In [0]:
print(len(train_data))
print(len(val_data))
print(len(test_data))


158567
95859
95860


In [0]:
print(train_data[0])
print(val_data[0])
print(test_data[0])


[3, 3, 3, 3, 27, 28, 24, 16, 1552, 91, 229, 966, 1553]
[12, 0, 0, 7, 147, 123, 92, 62, 1552, 1500, 500, 51, 1324, 1463, 1411, 1553]
[10, 1, 2, 7, 1000, 569, 290, 156, 1552, 1553]


#Data

In [0]:
context_len = 8
sent_len = 10
maxlen = context_len + sent_len
train_data = [exam[:maxlen-1] for exam in train_data]
val_data = [exam[:maxlen-1] for exam in val_data]
test_data = [exam[:maxlen-1] for exam in test_data]


train_data = keras.preprocessing.sequence.pad_sequences(train_data,
                                                        value=indexer.word2idx["<PAD>"],
                                                        padding='post',
                                                        maxlen=maxlen)

val_data =  keras.preprocessing.sequence.pad_sequences(test_data,
                                                       value=indexer.word2idx["<PAD>"],
                                                       padding='post',
                                                       maxlen=maxlen)

test_data = keras.preprocessing.sequence.pad_sequences(test_data,
                                                       value=indexer.word2idx["<PAD>"],
                                                       padding='post',
                                                       maxlen=maxlen)

In [0]:
#train_data = [[tup[0],tup[1],word] for tup in train_data for word in tup[2:] ]
#val_data = [ [tup[0],tup[1],word] for tup in val_data for word in tup[2:] ]
print(len(train_data), len(val_data), len(test_data))


158567 95860 95860


In [0]:
BUFFER_SIZE = 10000
BATCH_SIZE = 1000
fake = 10

dataset = tf.data.Dataset.from_generator(lambda: train_data, tf.int32, output_shapes=[None])
dataset.shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

val_dataset = tf.data.Dataset.from_generator(lambda: val_data, tf.int32, output_shapes=[None])
val_dataset = val_dataset.batch(BATCH_SIZE, drop_remainder=True)



#Model

In [0]:
steps_per_epoch = len(train_data)//BATCH_SIZE
epochs = 3
embedding_dim = 128
enc_units = 256
vocab_time_size = len(indexer.idx2date)
vocab_day_size = len(indexer.idx2day)
vocab_week_size = len(indexer.idx2week)
vocab_month_size = len(indexer.idx2month)

vocab_place_size = len(indexer.idx2place)
vocab_place2_size = len(indexer.idx2place2)
vocab_place4_size = len(indexer.idx2place4)
vocab_place8_size = len(indexer.idx2place8)

vocab_word_size = len(indexer.idx2word)

In [0]:
class LM_STTD(tf.keras.Model):
    def __init__(self):
        super(LM_STTD, self).__init__()
        self.embed_time = tf.keras.layers.Embedding(vocab_time_size, embedding_dim)
        self.embed_day = tf.keras.layers.Embedding(vocab_day_size, embedding_dim)
        self.embed_week = tf.keras.layers.Embedding(vocab_week_size, embedding_dim)
        self.embed_month = tf.keras.layers.Embedding(vocab_month_size, embedding_dim)

        self.embed_loc = tf.keras.layers.Embedding(vocab_place_size, embedding_dim)
        self.embed_loc2 = tf.keras.layers.Embedding(vocab_place2_size, embedding_dim)
        self.embed_loc4 = tf.keras.layers.Embedding(vocab_place4_size, embedding_dim)
        self.embed_loc8 = tf.keras.layers.Embedding(vocab_place8_size, embedding_dim)

        self.embed_word = tf.keras.layers.Embedding(vocab_word_size, embedding_dim)
        
        self.enc_units = enc_units
        
        self.fc_enc_emb_times = tf.keras.layers.Dense(embedding_dim, use_bias=True)
        self.fc_enc_emb_locs = tf.keras.layers.Dense(embedding_dim, use_bias=True)
        self.relu = tf.keras.layers.ReLU()
        #PREDICT
        self.fc_word = tf.keras.layers.Dense(vocab_word_size, use_bias=True)
        
        self.rnn_pre = tf.keras.layers.GRU(self.enc_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

    def call(self, batch, context_flag= "tp"):
        times_batch = tf.squeeze(batch[:,:1])
        days_batch = tf.squeeze(batch[:,1:2])
        weeks_batch = tf.squeeze(batch[:,2:3])
        months_batch = tf.squeeze(batch[:,3:4])

        locs_batch = tf.squeeze(batch[:,4:5])
        locs2_batch = tf.squeeze(batch[:,5:6])
        locs4_batch = tf.squeeze(batch[:,6:7])
        locs8_batch = tf.squeeze(batch[:,7:8])

        texts_batch = tf.squeeze(batch[:,8:-1])

        texts_embedded = self.embed_word(texts_batch)
        if context_flag == "":
            output, state = self.rnn_pre(texts_embedded)
            return self.fc_word(output)

        contexts = []
        contexts_expanded = []

        if "t" in context_flag:
            times_embedded = tf.expand_dims(self.embed_time(times_batch), 1)
            days_embedded = tf.expand_dims(self.embed_day(days_batch), 1)
            weeks_embedded = tf.expand_dims(self.embed_week(weeks_batch), 1)
            months_embedded = tf.expand_dims(self.embed_month(months_batch), 1)
            t = tf.concat([times_embedded, days_embedded, weeks_embedded, months_embedded], 2)
            times_context = self.relu(self.fc_enc_emb_times(t))
            contexts.append(times_context)
        
        if "p" in context_flag:
            locs_embedded = tf.expand_dims(self.embed_loc(locs_batch), 1)
            places2_embedded = tf.expand_dims(self.embed_loc2(locs2_batch), 1)
            places4_embedded = tf.expand_dims(self.embed_loc4(locs4_batch), 1)
            places8_embedded = tf.expand_dims(self.embed_loc8(locs8_batch), 1)
            p = tf.concat([locs_embedded, places2_embedded, places4_embedded, places8_embedded], 2)
            locs_context = self.relu(self.fc_enc_emb_locs(p))
            contexts.append(locs_context)
        
        context_tensor = tf.concat(contexts, 2)
        output, state = self.rnn_pre(texts_embedded, initial_state=tf.squeeze(context_tensor))

        context_expanded = tf.broadcast_to(context_tensor, [context_tensor.shape[0],output.shape[1],context_tensor.shape[2]])
        
        
        return self.fc_word(tf.concat(context_expanded, 2))




# Training

1. Pass the *input* through the *encoder* which return *encoder output* and the *encoder hidden state*.
2. The encoder output, encoder hidden state and the decoder input (which is the *start token*) is passed to the decoder.
3. The decoder returns the *predictions* and the *decoder hidden state*.
4. The decoder hidden state is then passed back into the model and the predictions are used to calculate the loss.
5. Use *teacher forcing* to decide the next input to the decoder.
6. *Teacher forcing* is the technique where the *target word* is passed as the *next input* to the decoder.
7. The final step is to calculate the gradients and apply it to the optimizer and backpropagate.

In [0]:
@tf.function
def train_step(batch):
    loss = 0
    loss_time = 0
    loss_place = 0
    loss_text = 0
    
    words_batch = tf.squeeze(batch[:,9:])
    
    with tf.GradientTape() as tape:
        predictions_word = model(batch)
        loss_word = loss_function(words_batch, predictions_word)
        loss = loss_word
    

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    
    train_acc_text(words_batch, predictions_word)
    train_loss_text(loss_word)
    
    return loss


def val_step(batch):
    words_batch = tf.squeeze(batch[:,9:])

    predictions_word = model(batch)
    val_acc_text(words_batch, predictions_word)

#Training

## Define the optimizer and the loss function

In [0]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    loss = tf.reduce_mean(loss_)
    return loss

##Trackers for Tensorboard

In [0]:
train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)

train_loss_time = tf.keras.metrics.Mean('train_loss_time', dtype=tf.float32)
train_loss_place = tf.keras.metrics.Mean('train_loss_place', dtype=tf.float32)
train_loss_text = tf.keras.metrics.Mean('train_loss_text', dtype=tf.float32)

train_acc_time = tf.keras.metrics.SparseCategoricalAccuracy('train_acc_time')
train_acc_place = tf.keras.metrics.SparseCategoricalAccuracy('train_acc_place')
train_acc_text = tf.keras.metrics.SparseCategoricalAccuracy('train_acc_text')

In [0]:
val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

val_loss_time = tf.keras.metrics.Mean('val_loss_time', dtype=tf.float32)
val_loss_place = tf.keras.metrics.Mean('val_loss_place', dtype=tf.float32)
val_loss_text = tf.keras.metrics.Mean('val_loss_text', dtype=tf.float32)

val_acc_time = tf.keras.metrics.SparseCategoricalAccuracy('val_acc_time')
val_acc_place = tf.keras.metrics.SparseCategoricalAccuracy('val_acc_place')
val_acc_text = tf.keras.metrics.SparseCategoricalAccuracy('val_acc_text')


##Checkpoints (Object-based saving)

In [0]:
checkpoint_dir = './training_checkpoints_'+str(epochs)
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)                           

In [0]:
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_dir = 'logs/gradient_tape_epochs_'+str(epochs)+'/' + current_time + '/train'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)

val_log_dir = 'logs/gradient_tape_epochs_'+str(epochs)+'/' + current_time + '/val'
val_summary_writer = tf.summary.create_file_writer(val_log_dir)

##Train

In [0]:
#model =  LM_STTD(vocab_time_size, vocab_space_size, vocab_word_size, embedding_dim, enc_units)
model =  LM_STTD()


In [0]:
EPOCHS = epochs

for epoch in range(EPOCHS):
    start = time.time()

    hidden = model.reset_states()
    
    total_loss = 0
    
    for (batch, inp) in enumerate(dataset.take(steps_per_epoch)):
        
        #batch_loss = train_step(inp,representtext_hidden)
        batch_loss = train_step(inp)
        
        total_loss += batch_loss

        #if batch % 1000 == 0:
        #    print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
        #                                             batch,
        #                                             batch_loss.numpy()))
    
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', train_loss.result(), step=epoch)
        tf.summary.scalar('loss_time', train_loss_time.result(), step=epoch)
        tf.summary.scalar('loss_place', train_loss_place.result(), step=epoch)
        tf.summary.scalar('loss_text', train_loss_text.result(), step=epoch)
    
        tf.summary.scalar('acc_time', train_acc_time.result(), step=epoch)
        tf.summary.scalar('acc_place', train_acc_place.result(), step=epoch)
        tf.summary.scalar('acc_text', train_acc_text.result(), step=epoch)
        
    for (batch, inp) in enumerate(val_dataset):
        val_step(inp)    
    
    with val_summary_writer.as_default():  
        tf.summary.scalar('val_acc_time', val_acc_time.result(), step=epoch)  
        tf.summary.scalar('val_acc_place', val_acc_place.result(), step=epoch)
        tf.summary.scalar('val_acc_text', val_acc_text.result(), step=epoch)
        
    # saving (checkpoint) the model every 2 epochs
    model.save_weights(checkpoint_prefix.format(epoch=epoch))

    print('Epoch {} Loss {:.4f}'.format(epoch + 1, total_loss / steps_per_epoch))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
    
    # Reset metrics every epoch
    train_loss.reset_states()
    train_loss_time.reset_states()    
    train_loss_place.reset_states()    
    train_loss_text.reset_states()  
    
    train_acc_time.reset_states()    
    train_acc_place.reset_states() 
    train_acc_text.reset_states()
    
    val_acc_time.reset_states()    
    val_acc_place.reset_states()
    val_acc_text.reset_states()

Epoch 1 Loss 3.8199
Time taken for 1 epoch 32.394216775894165 sec

Epoch 2 Loss 2.7360
Time taken for 1 epoch 27.706735849380493 sec

Epoch 3 Loss 2.4542
Time taken for 1 epoch 27.740156650543213 sec



In [0]:
%tensorboard --logdir logs/gradient_tape_epochs_60

Reusing TensorBoard on port 6006 (pid 1333), started 3:44:47 ago. (Use '!kill 1333' to kill it.)

In [0]:
#model.load_weights(checkpoint_prefix.format(epoch=0))