## Import Library

In [2]:
import os
import csv
import time
import json
import emoji
import fasttext
import numpy as np
import pandas as pd
import string
import json
import pickle
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import translators as ts

from datetime import date

from sklearn.model_selection import train_test_split
from sklearn.metrics import (f1_score,
                             precision_score,
                             recall_score, 
                             accuracy_score)

Using Indonesia server backend.


In [3]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, LSTM, Embedding, Bidirectional

## Test with Tensorflow Model

In [17]:
# prepare variables
symptom_df = pd.read_csv('dataset/symptom_id.csv')
disease_df = pd.read_csv('dataset/disease_id.csv')
model_tf = tf.keras.models.load_model('model/model-tensorflow.h5')
tokenizer = pickle.load(open('model/tokenizer.pkl', 'rb'))
le = LabelEncoder()
le.classes_ = np.load('model/classes_encoder.npy', allow_pickle=True)

In [31]:
text_indo = 'gatal ruam kulit' # masukan keluhan yang dirasakan dalam bhs. indonesia (dalam list)
text_en = ts.google(text_indo,to_language='en', from_language='id')

# preprocess text to sequences
text_en = tokenizer.texts_to_sequences([text_en])
text_en = pad_sequences(text_en, maxlen=64, truncating='pre')
start = time.time()
pred = model_tf.predict(text_en)
end = time.time()
preds = tf.math.top_k(pred, k=3) # prediksi top 3 kemungkinan disease 

list_predictions = le.inverse_transform(preds[1][0].numpy()) # list top 3 prediksi
list_score = preds[0][0].numpy().tolist() # list top 3 score
predict_time_tf = end - start




In [32]:
list_predictions, list_score, predict_time_tf

(array(['acne', 'gastroenteritis', 'allergy'], dtype=object),
 [0.29208892583847046, 0.25827163457870483, 0.21707503497600555],
 0.057981014251708984)

## Test with Bert Model (TFHub)

In [4]:
# prepare variables
symptom_df = pd.read_csv('dataset/symptom_id.csv')
disease_df = pd.read_csv('dataset/disease_id.csv')
model_bert = hub.KerasLayer("./model/model-tensorflow-bert.hub")
le = LabelEncoder()
le.classes_ = np.load('model/classes_encoder.npy', allow_pickle=True)

In [8]:
text_indo = 'gatal ruam kulit' # masukan keluhan yang dirasakan dalam bhs. indonesia (dalam list)
text_en = ts.google(text_indo,to_language='en', from_language='id')


start = time.time()
pred = model_bert([text_en])
end = time.time()
preds = tf.math.top_k(pred, k=3) # prediksi top 3 kemungkinan disease 

list_predictions = le.inverse_transform(preds[1][0].numpy()) # list top 3 prediksi
list_score = preds[0][0].numpy().tolist() # list top 3 score
predict_time_bert = end - start


In [9]:
list_predictions, list_score, predict_time_bert

(array(['fungal-infection', 'acne', 'drug-reaction'], dtype=object),
 [0.7257739305496216, 0.1899476945400238, 0.05927838012576103],
 1.2850043773651123)

## Test with Fasttext Model (non tensorflow)

In [28]:
# prepare variables
symptom_df = pd.read_csv('dataset/symptom_id.csv')
disease_df = pd.read_csv('dataset/disease_id.csv')
model_fasttext = fasttext.load_model('model/model.ftz')



In [33]:
text_indo = 'gatal ruam kulit' # masukan keluhan yang dirasakan dalam bhs. indonesia (dalam list)
text_en = ts.google(text_indo,to_language='en', from_language='id')


list_predictions = []
start = time.time()
prediction = model_fasttext.predict(text_en,3)
end = time.time()
for i in range(3):
    list_predictions.append(
        (prediction[0][i].replace("__label__",""), prediction[1][i])
    )

predict_time_fasttext = end - start

In [34]:
list_predictions, predict_time_fasttext

([('acne', 0.66585373878479),
  ('fungal-infection', 0.253587007522583),
  ('impetigo', 0.0704912319779396)],
 0.0)