<a href="https://colab.research.google.com/github/kwang0149/capstone-project-ml/blob/main/Capstone_C23_PS149.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow_text
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text

In [None]:
from google.colab import drive
drive.mount('/content/drive')
path_file = '/content/drive/MyDrive/SurveiCapstone'


In [None]:
# train dataset
df_train = pd.read_csv(f"{path_file}/capstone_dataset_train.csv")
df_train

Unnamed: 0,storyId,story,major
0,37.0,"Dalam hal ilmu matematika, saya memiliki kemam...",Ekonomi
1,42.0,"Sebagai individu, saya memiliki pemahaman mate...",Ekonomi
2,10.0,Saya memiliki keinginan yang kuat untuk mempel...,Ekonomi
3,21.0,Saya memiliki kekuatan yang matang dalam ilmu ...,Ekonomi
4,26.0,Saya memiliki kemampuan matematika yang luar b...,Ekonomi
5,28.0,"Dalam ilmu matematika, saya memiliki kemampuan...",Ekonomi
6,3.0,Saya memiliki pemahaman yang kuat dalam ilmu m...,Ekonomi
7,5.0,Saya memiliki keahlian yang cukup kuat dalam i...,Ekonomi
8,6.0,Saya memiliki kekuatan dalam ilmu matematika d...,Ekonomi
9,7.0,Saya memiliki keahlian yang kuat dalam ilmu ma...,Ekonomi


In [None]:
# validation dataset
df_validation = pd.read_csv(f"{path_file}/capstone_dataset_validation.csv")
df_validation

In [None]:
df_train = df_train[["story","major"]]
df_train = df_train.dropna()
df_train

In [None]:
df_validation = df_validation[["story","major"]]
df_validation = df_validation.dropna()
df_validation

In [None]:
# use pre-trained model universal-sentence-encoder-multilingual
embed = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder-multilingual/3",trainable=False)

In [None]:
embed(["saya suka tekonolgi","IT"]).shape

TensorShape([2, 512])

In [None]:
# adapt to the desired output (4 label/4 major)
classification_head = tf.keras.layers.Dense(4, input_shape=(512,))

In [None]:
# define input, add pre-trained model layer, add drouput layer, and dense layer
input_layer = tf.keras.Input(shape=(), dtype=tf.string)
embedding_layer = embed(input_layer)
dense_layer_1 = tf.keras.layers.Dense(512, activation='relu')(embedding_layer)
dropout_layer = tf.keras.layers.Dropout(0.5)(dense_layer_1)
dense_layer_2 = tf.keras.layers.Dense(256, activation='relu')(dropout_layer)
output_layer = tf.keras.layers.Dense(4)(dense_layer_2)

In [None]:
# define the input and output of the model
model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

In [None]:
# make dictionary out of the label
major_dictionary ={
    "IT" : 0,
    "Ekonomi" : 1,
    "Seni" : 2,
    "Kedokteran":3
}

In [None]:
# define the train dataset with the mapped label
df_train_model = df_train.copy()
df_train_model["major"]= df_train["major"].map(lambda x : major_dictionary[x])
df_train_model

In [None]:
# define the validation dataset with the mapped label
df_validation_model = df_validation.copy()
df_validation_model["major"]= df_validation["major"].map(lambda x : major_dictionary[x])
df_validation_model

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy',tf.keras.metrics.TopKCategoricalAccuracy(k=2)]
              )

In [None]:
result = model.fit(
    epochs=5,
    x=df_train_model['story'].values,
    y = df_train_model['major'].values,
    batch_size=32,
    validation_data=(df_validation_model['story'].values,
                     df_validation_model['major'].values)
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
result = model.fit(
    epochs=1,
    x=df_train_model['story'].values,
    y = df_train_model['major'].values,
    batch_size=32,
    validation_data=(df_validation_model['story'].values,
                     df_validation_model['major'].values)
)



In [None]:
model.predict(["Minat saya yang kuat dalam ilmu pengetahuan dan kesehatan. Pertama, saya adalah seorang pendengar yang baik. Saya percaya bahwa mendengarkan dengan penuh perhatian adalah kunci untuk memahami kondisi dan masalah pasien dengan baik. Saya ingin memberikan perhatian yang sungguh-sungguh kepada pasien saya dan berusaha memahami mereka secara menyeluruh, bukan hanya dari segi fisik, tetapi juga emosional dan sosial."])



array([[-0.14529292, -0.53812826, -0.20740402,  1.1377877 ]],
      dtype=float32)

In [None]:
model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None,)]                 0         
                                                                 
 keras_layer (KerasLayer)    (None, 512)               68927232  
                                                                 
 dense_10 (Dense)            (None, 512)               262656    
                                                                 
 dropout_2 (Dropout)         (None, 512)               0         
                                                                 
 dense_11 (Dense)            (None, 256)               131328    
                                                                 
 dense_12 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 69,322,244
Trainable params: 395,012
Non-trai

In [None]:
# save the model in the directory
export_dir = 'saved_model/1'
tf.saved_model.save(model, export_dir)



In [None]:
# set the optimization
mode = "Speed"

if mode == 'Storage':
    optimization = tf.lite.Optimize.OPTIMIZE_FOR_SIZE
elif mode == 'Speed':
    optimization = tf.lite.Optimize.OPTIMIZE_FOR_LATENCY
else:
    optimization = tf.lite.Optimize.DEFAULT

In [None]:
# Create a TensorFlow Lite converter object
converter = tf.lite.TFLiteConverter.from_saved_model('saved_model/1')

# Set the supported operations for TensorFlow Lite
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
  tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
]


# Set the optimzations
converter.optimizations = [optimization]

# Invoke the converter to finally generate the TFLite model
tflite_model = converter.convert()



In [None]:
if tflite_model is not None:
    print("Model conversion to TFLite successful.")
else:
    print("Model conversion to TFLite failed.")

Model conversion to TFLite successful.


In [None]:
from google.colab import files
# Save the TFLite model
with open('converted_model.tflite', 'wb') as f:
    f.write(tflite_model)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# download the tflite model
files.download('converted_model.tflite')