# BERT2BERT Model for Text Summarization

## Dependencies and Load Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install evaluate
!pip install rouge_score

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-3.0.1-py3-none-any.whl.metadata (20 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from evaluate)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0

In [None]:
from transformers import BertTokenizer, GPT2Tokenizer, EncoderDecoderModel, GenerationConfig
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer, Trainer, EarlyStoppingCallback
from datasets import Dataset, DatasetDict
import evaluate
import torch
import os
from os import listdir
from os.path import isfile, join
import json
import re
import numpy as np
import pandas as pd

In [None]:
device = torch.device('cuda')
torch.cuda.is_available()

True

In [None]:
data_train = pd.read_csv('/content/drive/MyDrive/ai-portfolio/project2-new/datasets/ready_data_train.csv')
data_test = pd.read_csv('/content/drive/MyDrive/ai-portfolio/project2-new/datasets/ready_data_test.csv')
data_dev = pd.read_csv('/content/drive/MyDrive/ai-portfolio/project2-new/datasets/ready_data_dev.csv')

In [None]:
data_train.shape, data_dev.shape, data_test.shape

In [None]:
data_train = data_train.sample(n=3000, random_state=88).reset_index(drop=True)
data_test = data_test.sample(n=1000, random_state=88).reset_index(drop=True)
data_dev = data_dev.sample(n=1000, random_state=88).reset_index(drop=True)

((3000, 2), (1000, 2), (1000, 2))

In [None]:
#Check if there is duplicate
data_train.duplicated().sum(), data_dev.duplicated().sum(), data_test.duplicated().sum()

(0, 0, 0)

In [None]:
dataset_train = Dataset.from_pandas(data_train)
dataset_valid = Dataset.from_pandas(data_dev)
dataset_test = Dataset.from_pandas(data_test)

dataset_dict = DatasetDict({
    'train': dataset_train,
    'val': dataset_valid,
    'test': dataset_test
})

## Define BERT2BERT Model

In [None]:
tokenizer = BertTokenizer.from_pretrained("cahya/bert2bert-indonesian-summarization")
tokenizer.bos_token = tokenizer.cls_token #setting beginning-of-sentence
tokenizer.eos_token = tokenizer.sep_token #setting end-of-sentence
model = EncoderDecoderModel.from_pretrained("cahya/bert2bert-indonesian-summarization")

tokenizer_config.json:   0%|          | 0.00/62.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/230k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.09k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/999M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


In [None]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [None]:
prefix = ""
def preprocess_function(examples):
    inputs = [prefix + doc for doc in examples["clean_article"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True)

    labels = tokenizer(text_target=examples["clean_summary"], max_length=128, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [None]:
tokenized_dataset = dataset_dict.map(preprocess_function, batched=True)

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

## Training

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    weight_decay=0.05,
    save_total_limit=3,
    num_train_epochs=10,
    predict_with_generate=True,
    # logging_dir='./logs',
    # logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="rouge2",
    greater_is_better=True,
)

In [None]:
rouge = evaluate.load('rouge')
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    if isinstance(predictions, tuple):
       predictions = preds[0]

    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)

    # Directly decode predictions
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=False)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=False)

    # Rouge expects newline-separated text
    decoded_preds = ["\n".join(decoded_pred.split()) for decoded_pred in decoded_preds]
    decoded_labels = ["\n".join(decoded_label.split()) for decoded_label in decoded_labels]

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    return {k: round(v, 4) for k, v in result.items()}

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [None]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["val"],
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

In [None]:
torch.cuda.empty_cache()

In [None]:
trainer.train()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,2.478442,0.3944,0.2408,0.3542,0.3942
2,0.767500,2.583655,0.4124,0.2596,0.373,0.4126
3,0.376800,2.677687,0.4002,0.2457,0.3616,0.4002
4,0.202600,2.759713,0.3915,0.2375,0.351,0.3915
5,0.202600,2.856006,0.4108,0.2563,0.3719,0.4107


Non-default generation parameters: {'max_length': 40, 'min_length': 20, 'early_stopping': True, 'num_beams': 10, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}
Non-default generation parameters: {'max_length': 40, 'min_length': 20, 'early_stopping': True, 'num_beams': 10, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}
Non-default generation parameters: {'max_length': 40, 'min_length': 20, 'early_stopping': True, 'num_beams': 10, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}
Non-default generation parameters: {'max_length': 40, 'min_length': 20, 'early_stopping': True, 'num_beams': 10, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}
Non-default generation parameters: {'max_length': 40, 'min_length': 20, 'early_stopping': True, 'num_beams': 10, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}
There were missing keys in the checkpoint model loaded: ['decoder.cls.predictions.decoder.weight', 'decoder.cls.predictions.decoder.bias'].


TrainOutput(global_step=1875, training_loss=0.3802377522786458, metrics={'train_runtime': 3012.6412, 'train_samples_per_second': 9.958, 'train_steps_per_second': 1.245, 'total_flos': 6608396941430784.0, 'train_loss': 0.3802377522786458, 'epoch': 5.0})

In [None]:
eval_results = trainer.evaluate(eval_dataset=tokenized_dataset["test"])
print(eval_results)



{'eval_loss': 2.2014706134796143, 'eval_rouge1': 0.4555, 'eval_rouge2': 0.2982, 'eval_rougeL': 0.4124, 'eval_rougeLsum': 0.4556, 'eval_runtime': 393.3611, 'eval_samples_per_second': 2.542, 'eval_steps_per_second': 0.636, 'epoch': 5.0}


In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# Save the model
model.save_pretrained("/content/drive/MyDrive/datasets/bert2bert_indo_sum")

# Save the tokenizer
tokenizer.save_pretrained("/content/drive/MyDrive/datasets/bert2bert_indo_sum/tokenizer")

Non-default generation parameters: {'max_length': 40, 'min_length': 20, 'early_stopping': True, 'num_beams': 10, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}


('/kaggle/working/bert2bert_indo_sum/tokenizer/tokenizer_config.json',
 '/kaggle/working/bert2bert_indo_sum/tokenizer/special_tokens_map.json',
 '/kaggle/working/bert2bert_indo_sum/tokenizer/vocab.txt',
 '/kaggle/working/bert2bert_indo_sum/tokenizer/added_tokens.json')

In [None]:
!zip -r bert2bert_indo_sum.zip /content/drive/MyDrive/datasets/bert2bert_indo_sum

  pid, fd = os.forkpty()


  adding: kaggle/working/bert2bert_indo_sum/ (stored 0%)
  adding: kaggle/working/bert2bert_indo_sum/config.json (deflated 80%)
  adding: kaggle/working/bert2bert_indo_sum/generation_config.json (deflated 39%)
  adding: kaggle/working/bert2bert_indo_sum/model.safetensors (deflated 7%)
  adding: kaggle/working/bert2bert_indo_sum/tokenizer/ (stored 0%)
  adding: kaggle/working/bert2bert_indo_sum/tokenizer/special_tokens_map.json (deflated 54%)
  adding: kaggle/working/bert2bert_indo_sum/tokenizer/tokenizer_config.json (deflated 75%)
  adding: kaggle/working/bert2bert_indo_sum/tokenizer/vocab.txt (deflated 51%)


## Inference

In [None]:
from transformers import BertModel, AutoTokenizer, BertTokenizer, EncoderDecoderModel
import torch

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("/content/drive/MyDrive/datasets/tokenizer", use_fast=False)

# Load the model
model = BertModel.from_pretrained("/content/drive/MyDrive/datasets/bert2bert_indo_sum")

# Set device to GPU if available
device = torch.device('cuda')
torch.cuda.is_available()
model.to(device)

You are using a model of type encoder-decoder to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertModel were not initialized from the model checkpoint at /content/drive/MyDrive/datasets/bert2bert_indo_sum and are newly initialized: ['embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.de

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(32000, 768, padding_idx=2)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [None]:
def clean_article(text):
    # Remove phrases like "Liputan6.com, [City Name]:"
    text = re.sub(r'Liputan6\. com, [A-Za-z\s]+:', '', text)

    # Remove words followed by a date in the format (dd/mm) or (d/m)
    text = re.sub(r'\w+\s*\(\d{1,2}/\d{1,2}\)', '', text)

    # Remove text inside parentheses that follow a specific pattern (e.g., (UPI/Reporter Name)) and optionally with 'dan' conjunction
    text = re.sub(r'\([A-Z]+/[A-Za-z\s]+(?: dan [A-Za-z\s]+)?\)\.', '', text)

    # Remove text inside square brackets that starts with 'baca:'
    text = re.sub(r'\[baca: .*?\]', '', text)

    # Remove URLs starting with http or https
    text = re.sub(r'https?://\S+', '', text)

    # Remove leading and trailing whitespaces from the text
    return text.strip()

In [None]:
%%time
ARTICLE_TO_SUMMARIZE = """Bank Mandiri, sebagaimana bank umumnya, menyediakan layanan kartu debit bagi nasabahnya. Kartu debit Mandiri dapat digunakan oleh nasabah untuk melakukan berbagai transaksi di mesin ATM atau mesin EDC. Fungsi dari Kartu Debit Mandiri ini sangat beragam, mulai dari tarik tunai, setor tunai, transfer uang, cek saldo rekening, hingga membayar berbagai tagihan melalui mesin ATM.
Penting bagi Anda yang ingin membuka rekening tabungan di Bank Mandiri untuk memahami jenis Kartu Debit Mandiri agar tidak salah memilih. Setiap kartu debit Mandiri memiliki kelebihan dan kekurangannya masing-masing, sehingga penting bagi nasabah untuk memilih yang sesuai dengan kebutuhan dan preferensi mereka.

Dalam memilih jenis Kartu Debit Mandiri, nasabah perlu mempertimbangkan kebutuhan dan gaya hidup mereka. Apakah mereka membutuhkan manfaat tambahan seperti asuransi atau akses ke airport lounge, ataukah mereka menginginkan kartu debit yang sederhana namun praktis. Dengan mengetahui jenis Kartu Debit Mandiri yang sesuai, nasabah dapat memaksimalkan manfaat yang mereka dapatkan dari penggunaan kartu debit tersebut.
"""

# generate summary
input_ids = tokenizer.encode(clean_article(ARTICLE_TO_SUMMARIZE), return_tensors='pt')
summary_ids = model.generate(input_ids.to(model.device),
            min_length=20,
            max_length=128,
            num_beams=10,
            repetition_penalty=2.5,
            length_penalty=1.0,
            early_stopping=True,
            no_repeat_ngram_size=2,
            use_cache=True,
            do_sample = True,
            temperature = 0.1,
            top_k = 50,
            top_p = 0.95)
# start time dan end time
summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(summary_text)

penting bagi anda yang ingin membuka rekening tabungan di bank mandiri untuk melakukan berbagai transaksi di mesin atm atau mesin edc.
CPU times: user 842 ms, sys: 24 ms, total: 866 ms
Wall time: 919 ms


In [None]:
# parent_folder = "/content/" # Pake yang ada di folder "clean_data" di Google drive
# test_set = pd.read_csv(parent_folder+'final_test_set.csv')

In [None]:
df_sample = test_set.sample(1)

In [None]:
df_sample["final_clean_article"].values

array(['Kepala Kepolisian Daerah Metro Jaya Inspektur Jenderal Polisi Sofjan Jacoeb menilai para pelaku kejahatan semakin nekat dan tak segan-segan melukai serta membunuh korban. Sebab itu, ia meminta masyarakat menggiatkan pengamanan swakarsa dan menjaga lingkungan masing-masing. Penilaian tersebut Sofjan sampaikan di Jakarta, baru-baru ini. Menurut Sofjan, musibah yang menimpa Brigadir Polisi Mursito, anggota Kepolisian Resor Jakarta Barat yang tewas saat mengawal uang nasabah Bank Central Asia, dapat menjadi cermin kebrutalan penjahat . Di samping itu, Sofjan menegaskan, insiden tersebut juga menjadi tantangan bagi kepolisian untuk memberikan rasa aman kepada masyarakat.'],
      dtype=object)

In [None]:
%%time
ARTICLE_TO_SUMMARIZE = """Kepala Kepolisian Daerah Metro Jaya Inspektur Jenderal Polisi Sofjan Jacoeb menilai para pelaku kejahatan semakin nekat dan tak segan-segan melukai serta membunuh korban. Sebab itu, ia meminta masyarakat menggiatkan pengamanan swakarsa dan menjaga lingkungan masing-masing. Penilaian tersebut Sofjan sampaikan di Jakarta, baru-baru ini. Menurut Sofjan, musibah yang menimpa Brigadir Polisi Mursito, anggota Kepolisian Resor Jakarta Barat yang tewas saat mengawal uang nasabah Bank Central Asia, dapat menjadi cermin kebrutalan penjahat . Di samping itu, Sofjan menegaskan, insiden tersebut juga menjadi tantangan bagi kepolisian untuk memberikan rasa aman kepada masyarakat."""

# generate summary
input_ids = tokenizer.encode(clean_article(ARTICLE_TO_SUMMARIZE), return_tensors='pt')
summary_ids = model.generate(input_ids.to(model.device),
            min_length=20,
            max_length=128,
            num_beams=10,
            repetition_penalty=2.5,
            length_penalty=1.0,
            early_stopping=True,
            no_repeat_ngram_size=2,
            use_cache=True,
            do_sample = True,
            temperature = 0.1,
            top_k = 50,
            top_p = 0.95)
# start time dan end time
summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(summary_text)

kapolda metro jaya irjen pol. sofjan jacoeb menilai para pelaku kejahatan semakin nekat dan tak segan - segan melukai serta membunuh korban. masyarakat diminta menjaga lingkungan.
CPU times: user 795 ms, sys: 3.99 ms, total: 799 ms
Wall time: 797 ms


## Inference in Gradio UI

### Dependencies for Gradio and load pretrained model

In [None]:
#Installing Gradio for colab if not exist
!pip install gradio

Collecting gradio
  Downloading gradio-5.1.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.0 (from gradio)
  Downloading gradio_client-1.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.26.0-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata

In [None]:
#Dependencies
from transformers import BertModel, AutoTokenizer, BertTokenizer, BertForSequenceClassification, EncoderDecoderModel
import torch
import gradio as gr

# Load the custom model and tokenizer from your saved path
model_path = "/content/drive/MyDrive/datasets/bert2bert_indo_sum"
tokenizer_path = "/content/drive/MyDrive/datasets/tokenizer"
model = EncoderDecoderModel.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(tokenizer_path)

#Set Token
tokenizer.bos_token = tokenizer.cls_token
tokenizer.eos_token = tokenizer.sep_token

# Set device to GPU using cuda
device = torch.device('cuda')
model.to(device)

EncoderDecoderModel(
  (encoder): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(32000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

### Setting up the gradio

In [None]:
# Function to summarize text
def summarize_text(text):
    # Preprocess the input text
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)

    # Generate summary (setting like LLM)
    summary_ids = model.generate(inputs.to(model.device),
                                 min_length=20, #Minimum text to input
                                 max_length=512, #Maximum text to input
                                 num_beams=10,
                                 repetition_penalty=1.1, #This function for how much to discourage repeating the same token
                                 length_penalty=1.0,
                                 early_stopping=True,
                                 no_repeat_ngram_size=2,
                                 use_cache=True,
                                 do_sample=True,
                                 temperature=0.1, #If the text want to be similar with the source set temp to 0.1, if want to be creative set to 0.7
                                 top_k=45, #Limits the next token to one of the top-k most probable tokens. Acts similarly to temperature
                                 top_p=0.95) #Minimum cumulative probability for the possible next tokens. Acts similarly to temperature

    # Decode and return the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Create the Gradio interface
interface = gr.Interface(
    fn=summarize_text,
    inputs="text",
    outputs="text",
    title="Indonesian Text Summarizer",
    description="Enter a long piece of Indonesian text to get a concise summary generated by the custom BERT-to-BERT model.",
    examples=["Artificial Intelligence (AI) cepat mengubah berbagai industri..."]
)

# Launch the app
interface.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://8a09a209bb46eb8437.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
