<a href="https://colab.research.google.com/github/mosesandrian/OneStopTravelCompanion/blob/main/TravelCompanion_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import numpy as np
import json
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.utils import shuffle
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [40]:
# Here to pre-process the data input
places = []
labels = []
training_places = []
training_labels = []
testing_places = []
testing_labels = []

with open("/tmp/Yogyakarta.json", 'r') as f:
    datastore = json.load(f)

for item in datastore['results']:
  places.append(item['name'])
  labels.append(item['types'])

In [41]:
one_hot = MultiLabelBinarizer()
labels = one_hot.fit_transform(labels)
class_types = one_hot.classes_

In [None]:
def extractPlaces(lst):
  return list(map(lambda x:[x], lst))

dataset = tf.data.Dataset.from_tensor_slices((extractPlaces(places),
                                              labels))

In [42]:
# shuffle data
places_shuffled, labels_shuffled = shuffle(places, labels)

# to show data frame
f = pd.DataFrame(data=labels_shuffled,
                 index=places_shuffled,
                 columns=class_types)
f

# split data testing and training
training_places = places_shuffled[:]
training_labels = labels_shuffled[:]
testing_places = places_shuffled[:]
testing_labels = labels_shuffled[:]

Unnamed: 0,budaya,cagar_alam,maritim,religi,sejarah
Kampung Wisata Sosromenduran,1,0,0,0,0
Taman Air Menari,1,0,0,0,0
Kampung MARKISA Blunyahrejo,0,1,0,0,0
VARIASI RUMAH AQUARIUM PAK BAWOR,1,1,0,0,0
Sanggar Sastra Indonesia Yogyakarta,1,0,0,0,1
...,...,...,...,...,...
Kampung Lampion Code 18,1,1,0,0,1
Romo Mangun Museum,1,0,0,0,1
SANGGAR PENANTIAN,1,0,0,0,1
PJL Geser,1,0,0,0,1


In [43]:
vocab_size = 12000
embedding_dim = 16
max_length = 24
trunc_type = 'post'
oov_tok = "<OOV>"

In [44]:
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_places)
word_index = tokenizer.word_index

sequences = tokenizer.texts_to_sequences(training_places)
training_padded = pad_sequences(sequences, maxlen=max_length, truncating = trunc_type)

testing_sequences = tokenizer.texts_to_sequences(training_places)
testing_padded = pad_sequences(testing_sequences)

In [45]:
model = tf.keras.Sequential([
  tf.keras.layers.Embedding(vocab_size,
                            embedding_dim,
                            input_length=max_length),
  tf.keras.layers.Conv1D(64, 5, activation='relu'),
  tf.keras.layers.GlobalMaxPooling1D(),
  tf.keras.layers.Dense(48, activation='relu'),
  tf.keras.layers.Dense(5, activation='softmax') 
])

# kategori: **Budaya, Sejarah, Cagar Alam, Maritim, Religi**

model.compile(loss="sparse_categorical_crossentropy",
              metrics=["accuracy"],
              optimizer='adam')

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 24, 16)            192000    
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 20, 64)            5184      
_________________________________________________________________
global_max_pooling1d_3 (Glob (None, 64)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 48)                3120      
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 245       
Total params: 200,549
Trainable params: 200,549
Non-trainable params: 0
_________________________________________________________________


In [46]:
num_epochs = 10

training_padded = np.array(training_padded)
training_labels = np.array(training_labels)
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels)

history = model.fit(training_padded, training_labels,
                    epochs=num_epochs,
                    #validation_data=(testing_padded,testing_labels),
                    verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
