In [1]:
import torch
import pandas as pd
from transformers import EncoderDecoderModel, BertTokenizer
import warnings
warnings.filterwarnings('ignore')

import torch_directml

# Gunakan GPU jika tersedia
device = torch_directml.device()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Fungsi untuk memuat model BERT
def load_bert_model(model_path):
    model = EncoderDecoderModel.from_pretrained(model_path)
    tokenizer = BertTokenizer.from_pretrained(model_path)
    model = model.to(device)
    return model, tokenizer

# Fungsi untuk menghasilkan ringkasan menggunakan BERT
def generate_bert_summary(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    summary_ids = model.generate(
        inputs["input_ids"],
        num_beams=4,
        max_length=256,
        early_stopping=True
    )
    
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

In [3]:
# Daftar bagian
sections = ['latarbelakang', 'rumusanmasalah', 'tujuanpenelitian', 'rangkumanpenelitianterkait', 'metodologipenelitian']

# Proses setiap bagian
for section in sections:
    print(f"Processing {section}...")
    
    # Muat data
    df = pd.read_csv(f'data/final-data/{section}.csv')
    
    # Muat model BERT
    model, tokenizer = load_bert_model(f"model/saved_model_{section}")
    
    # Generate summaries
    summaries = []
    for _, row in df.iterrows():
        summary = generate_bert_summary(row['kalimat'], model, tokenizer)
        summaries.append(summary)
        
        # Bersihkan memori GPU
        torch.cuda.empty_cache()
    
    # Buat DataFrame hasil
    result_df = pd.DataFrame({
        'nama_dokumen': df['nama_dokumen'],
        'summary_bert': summaries
    })
    
    # Simpan hasil ke CSV
    result_df.to_csv(f'data/output-bert/{section}.csv', index=False)
    
    print(f"Completed {section}")

print("All sections processed and saved.")

Processing latarbelakang...
Completed latarbelakang
Processing rumusanmasalah...
Completed rumusanmasalah
Processing tujuanpenelitian...
Completed tujuanpenelitian
Processing rangkumanpenelitianterkait...
Completed rangkumanpenelitianterkait
Processing metodologipenelitian...
Completed metodologipenelitian
All sections processed and saved.
