In [32]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score

import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional, SimpleRNN
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.text import Tokenizer

from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.models import load_model, Model

from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
data = pd.read_excel('dataBerita.xlsx', usecols=['kategori', 'berita'])

In [3]:
data.head()

Unnamed: 0,kategori,berita
0,valid;,Gunung Agung erupsi untuk pertama kali pada 2...
1,valid;,Jakarta CNN Indonesia -- Menteri BUMN Erick T...
2,valid;,Dosen Fakultas Kedokteran Hewan IPB Yusuf Ridw...
3,valid;,Jakarta - Dua anggota TNI Serda N dan Serda DA...
4,valid;,Akui Tembak Jatuh Pesawat Ukraina Iran Tuai Ge...


In [4]:
print(data.columns)


Index(['kategori', 'berita'], dtype='object')


In [5]:
print(data['kategori'].unique())

['valid;' '\nvalid;' 'valid; ' 'hoax;']


In [6]:
# Membersihkan data dengan menghapus whitespace dan karakter tidak perlu
data['kategori'] = data['kategori'].str.strip()        # Menghapus whitespace di awal dan akhir
data['kategori'] = data['kategori'].str.replace(';', '', regex=False)  # Menghapus tanda semicolon

In [7]:
print(data['kategori'].unique())

['valid' 'hoax']


In [8]:
label_counts = data['kategori'].value_counts()

print("Number of hoax samples:", label_counts.get('hoax', 0))
print("Number of valid samples:", label_counts.get('valid', 0))

Number of hoax samples: 507
Number of valid samples: 507


In [9]:
data['kategori_enc'] = data['kategori'].map({'valid': 0, 'hoax': 1})

In [12]:
data.head()

Unnamed: 0,kategori,berita,kategori_enc
0,valid,Gunung Agung erupsi untuk pertama kali pada 2...,0
1,valid,Jakarta CNN Indonesia -- Menteri BUMN Erick T...,0
2,valid,Dosen Fakultas Kedokteran Hewan IPB Yusuf Ridw...,0
3,valid,Jakarta - Dua anggota TNI Serda N dan Serda DA...,0
4,valid,Akui Tembak Jatuh Pesawat Ukraina Iran Tuai Ge...,0


In [11]:
x = data.drop('kategori_enc',axis=1)
y = data['kategori_enc']

Data Pre-processing and One Hot Represenatation

In [24]:
from collections import Counter
import re

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    words = text.split()
    return words

def analyze_word_frequency(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    
    # Preprocess and count word frequencies
    words = preprocess_text(text)
    word_freq = Counter(words)
    
    # Sort frequencies in descending order
    sorted_frequencies = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
    
    print("Total unique words:", len(word_freq))
    print("\nTop 20 most frequent words:")
    for word, freq in sorted_frequencies[:20]:
        print(f"{word}: {freq}")
    
    return word_freq, sorted_frequencies

# Usage
file_path = 'corpus2.txt'
word_frequencies, sorted_freq = analyze_word_frequency(file_path)

Total unique words: 11878

Top 20 most frequent words:
politik: 1072
partai: 631
indonesia: 505
orang: 498
presiden: 420
jakarta: 369
negara: 357
salah: 345
pilih: 339
perintah: 321
jokowi: 290
masyarakat: 289
temu: 277
milu: 277
nama: 262
ketua: 262
milik: 261
hasil: 260
kait: 252
kota: 247


In [None]:
# Read the corpus from the text file
corpus = []
with open('corpus2.txt', 'r', encoding='utf-8') as f:
    corpus = [line.strip() for line in f.readlines()]


In [23]:
# Hitung panjang setiap kalimat
sentence_lengths = [len(sentence.split()) for sentence in corpus]

# Temukan panjang maksimum dan kalimat yang memiliki panjang maksimum tersebut
max_length = max(sentence_lengths)
max_length_index = sentence_lengths.index(max_length)
longest_sentence = corpus[max_length_index]

# Menghitung panjang setiap kalimat
sentence_lengths = [len(sentence.split()) for sentence in corpus]

# Menemukan panjang pada persentil ke-95
maxlen_recommended = int(np.percentile(sentence_lengths, 95))

print("Panjang kalimat maksimum:", max_length)
print("Kalimat dengan panjang maksimum:", longest_sentence)
print("Panjang maxlen yang direkomendasikan (95 persentil):", maxlen_recommended)

Panjang kalimat maksimum: 625
Kalimat dengan panjang maksimum: liput com jakarta desah kokpit maskapai terbang lion air marak berita tanah air dunia sorot kutip news com au sabtu media australia tulis picu maskapai terbang murah besar indonesia lidi internal kopilot larang terbang tulis media negeri kanguru kutip nyata direktur lion air edward sirait nyata langkah cabut izin terbang kompensasi si kopilot aku cipta desah heboh kejut ulang muat media inggris daily mail situs berita tajuk artikel kait pilot banned from flying after offering divorced air hostess as compensation for a delay on a flight to bal media pakistan tribune pk artikel indonesian pilot grounded for offering hostess as compensation berita heboh lapor tumpang nama lambertus maengkom media pakistan beber pesawat lion air lepas landas lambat pulau jawa kopilot canda tawar pramugari janda tugas minta maaf keras suara situs singapura the straits times tinggal tulis desah kokpit lion air laman judul indonesia s lion air gro

In [15]:
# We need to reset the indices as we have sampled our initial news dataset
msg = x.copy()
msg.reset_index(inplace=True)

In [16]:
# import StemmerFactory class
# create stemmer
factory = StemmerFactory()
stemmer = factory.create_stemmer()

In [20]:
#Data pre-pocessing

corpus=[]
for i in range(0,len(msg)):
    review= re.sub('[^a-zA-Z]',' ',msg['berita'][i])
    review= review.lower()
    review= review.split()

    review= [stemmer.stem(word) for word in review if not word in stopwords.words('indonesian')]
    review= ' '.join(review)
    corpus.append(review)

In [25]:
len(corpus)

1014

In [None]:
from collections import Counter

# Hitung frekuensi kemunculan setiap kata dalam corpus
word_counts = Counter(" ".join(corpus).split())

# Tentukan ambang batas kata dengan frekuensi rendah (kurang dari 5 kali)
threshold = 5
low_freq_words = {word: count for word, count in word_counts.items() if count < threshold}

print(f"Jumlah kata dengan frekuensi < {threshold}: {len(low_freq_words)}")


Jumlah kata dengan frekuensi < 5: 8660


In [175]:
#one-hot frepresentation
voc_size = 5500
one_hot_re = [one_hot(element,voc_size) for element in corpus]

#using pad_sequences to make all the representations of fixed length
sentence_length=203
embedded_docs= pad_sequences(one_hot_re, padding='pre', maxlen=sentence_length)

In [166]:
# Tokenize the corpus
tokenizer = Tokenizer(num_words=6500)  # Use the same vocab size as during training
tokenizer.fit_on_texts(corpus)  # Fit tokenizer on the new corpus (could use the same tokenizer as before)
sequences = tokenizer.texts_to_sequences(corpus)

# Pad the sequences to ensure the same length as the training data
sentence_length = 203  # Ensure this matches the training sentence length
embedded_docs = pad_sequences(sequences, padding='pre', maxlen=sentence_length)

Creating Model

In [176]:
x_final = np.array(embedded_docs)
y_final= np.array(y)

In [177]:
# Set up k-fold cross-validation
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

### RNN

In [49]:
def create_model(voc_size, embedding_vector_features, sentence_length):
    inputs = tf.keras.Input(shape=(sentence_length,))
    embedding_layer = Embedding(voc_size, embedding_vector_features)(inputs)
    rnn_layer = SimpleRNN(32)(embedding_layer) 
    outputs = Dense(1, activation='sigmoid')(rnn_layer)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [84]:
best_accuracy = 0 
best_precision = 0 
best_recall = 0  
best_f1 = 0

best_model = None 

best_val_loss = None  
best_val_accuracy = None  

for train_index, test_index in kf.split(x_final):
    x_train, x_test = x_final[train_index], x_final[test_index]
    y_train, y_test = y_final[train_index], y_final[test_index]
    
    # Create a new model for each fold
    model = create_model(voc_size=6500, embedding_vector_features=100, sentence_length=203)
    
    # Set up the model checkpoint to save the best model during training
    checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min', verbose=0)
    tensorboard_callback = TensorBoard(log_dir='./logs')

    # Train the model on the current fold with validation data
    history = model.fit(x_train, y_train, epochs=10, batch_size=64, verbose=0, validation_data=(x_test, y_test), callbacks=[checkpoint, tensorboard_callback])  
    
    # Load the best model based on validation loss
    model = load_model('best_model.keras')

    # Predict on the test set
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Track the best model based on accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_precision = precision
        best_recall = recall
        best_f1 = f1

        best_model = model
        # Store the validation loss and accuracy for the best model
        best_val_loss = history.history['val_loss']
        best_val_accuracy = history.history['val_accuracy']
    
print("Best Model Accuracy:", best_accuracy)
print("Best Model Precision:", best_precision)
print("Best Model Recall:", best_recall)
print("Best Model F1-Score:", best_f1)

# After the K-fold cross-validation loop, print eval losses and accuracies for the best model
print("Best Model Validation Loss:", best_val_loss)
print("Best Model Validation Accuracy:", best_val_accuracy)

# Optionally, save the best model
best_model.save('t-100-6500-rnn.keras')


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step
Best Model Accuracy: 0.9507389162561576
Best Model Precision: 0.9345794392523364
Best Model Recall: 0.970873786407767
Best Model F1-Score: 0.9523809523809523
Best Model Validation Loss: [0.41908061504364014, 0.3513529300689697, 0.30064064264297485, 0.41932252049446106, 0.30479830503463745, 0.25229209661483765, 0.21917681396007538, 0.20514115691184998, 0.19605189561843872, 0.19223108887672424]
Best Model Validation Accuracy: [0.8916256427764893, 0.866995096206665, 0.8817734122276306, 0.8374384045600891, 0.9162561297416687, 0.9211822748184204, 0.9458128213882446, 0.9507389068603516, 0.9507389068603516, 0.9507389068603516]


In [85]:
# Load the best model
best_model = load_model('t-100-6500-rnn.keras')

In [35]:
# Test the model with new data
test = pd.read_excel('../test.xlsx', usecols=['kategori', 'berita'])

test.head()

Unnamed: 0,berita,kategori
0,Hakim Wahyu Iman Santoso Alami Kecelakaan Tung...,1
1,MEGAWATI DAN PUAN BERMAIN SLOT Nenek lampir pe...,1
2,JONATHAN LATUMAHINA SEORANG NASRANI DAN PENYUS...,1
3,"PDI-P Diblacklist dari Peserta Pilpres, Tak Bi...",1
4,Presiden Joe Biden dan Volodymyr Zelenskyy Ber...,1


In [36]:
# Encode the target labels (0 for valid, 1 for hoax)
test['kategori_enc'] = test['kategori']

In [37]:
# Read the corpus from the text file
corpus_test = []
with open('corpus_test.txt', 'r', encoding='utf-8') as f:
    corpus_test = [line.strip() for line in f.readlines()]

In [86]:
# Tokenize the corpus
tokenizer = Tokenizer(num_words=6500)  # Use the same vocab size as during training
tokenizer.fit_on_texts(corpus_test)  # Fit tokenizer on the new corpus (could use the same tokenizer as before)
sequences = tokenizer.texts_to_sequences(corpus_test)

# Pad the sequences to ensure the same length as the training data
sentence_length = 203  # Ensure this matches the training sentence length
embedded_docs = pad_sequences(sequences, padding='pre', maxlen=sentence_length)

# Get the true labels (if available) for evaluation
new_y_final = test['kategori_enc'].values  # Assuming 'kategori' column contains labels (0 or 1)

# Predict with the model
new_predictions = (best_model.predict(embedded_docs) > 0.5).astype("int32").flatten()

new_accuracy = accuracy_score(new_y_final, new_predictions)
new_precision = precision_score(new_y_final, new_predictions)
new_recall = recall_score(new_y_final, new_predictions)
new_f1 = f1_score(new_y_final, new_predictions)

# Print the evaluation results
print(f"Encode with Tokenizer")

print(f"Accuracy on new data: {new_accuracy}")
print(f"Precision on new data: {new_precision}")
print(f"Recall on new data: {new_recall}")
print(f"F1 Score on new data: {new_f1}")

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
Encode with Tokenizer
Accuracy on new data: 0.648
Precision on new data: 0.6778846153846154
Recall on new data: 0.564
F1 Score on new data: 0.6157205240174672


### LSTM

In [169]:
embedding_vector_features = 100
voc_size=6500
print(voc_size)
print(embedding_vector_features)
print(sentence_length)

6500
100
203


In [103]:
def create_model(voc_size, embedding_vector_features, sentence_length):
    inputs = tf.keras.Input(shape=(sentence_length,))
    embedding_layer = Embedding(voc_size, embedding_vector_features)(inputs)
    lstm_layer = LSTM(32)(embedding_layer)
    outputs = Dense(1, activation='sigmoid')(lstm_layer)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [170]:
best_accuracy = 0 
best_precision = 0 
best_recall = 0  
best_f1 = 0

best_model = None 

best_val_loss = None  
best_val_accuracy = None  

for train_index, test_index in kf.split(x_final):
    x_train, x_test = x_final[train_index], x_final[test_index]
    y_train, y_test = y_final[train_index], y_final[test_index]
    
    # Create a new model for each fold
    model = create_model(voc_size=6500, embedding_vector_features=100, sentence_length=203)
    
    # Set up the model checkpoint to save the best model during training
    checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min', verbose=0)
    tensorboard_callback = TensorBoard(log_dir='./logs')

    # Train the model on the current fold with validation data
    history = model.fit(x_train, y_train, epochs=10, batch_size=64, verbose=0, validation_data=(x_test, y_test), callbacks=[checkpoint, tensorboard_callback])  
    
    # Load the best model based on validation loss
    model = load_model('best_model.keras')

    # Predict on the test set
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Track the best model based on accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_precision = precision
        best_recall = recall
        best_f1 = f1

        best_model = model
        # Store the validation loss and accuracy for the best model
        best_val_loss = history.history['val_loss']
        best_val_accuracy = history.history['val_accuracy']
    
print("Best Model Accuracy:", best_accuracy)
print("Best Model Precision:", best_precision)
print("Best Model Recall:", best_recall)
print("Best Model F1-Score:", best_f1)

# After the K-fold cross-validation loop, print eval losses and accuracies for the best model
print("Best Model Validation Loss:", best_val_loss)
print("Best Model Validation Accuracy:", best_val_accuracy)

# Optionally, save the best model
best_model.save('t-100-6500-lstm.keras')


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 56ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step
Best Model Accuracy: 0.9702970297029703
Best Model Precision: 0.9789473684210527
Best Model Recall: 0.9587628865979382
Best Model F1-Score: 0.96875
Best Model Validation Loss: [0.6356028914451599, 0.43773016333580017, 0.2691842019557953, 0.2006787210702896, 0.15540505945682526, 0.1496114581823349, 0.13880032300949097, 0.12946797907352448, 0.13094346225261688, 0.13469159603118896]
Best Model Validation Accuracy: [0.896039605140686, 0.8762376308441162, 0.9257425665855408, 0.9405940771102905, 0.9603960514068604, 0.9554455280303955, 0.9603960514068604, 0.9702970385551453, 0.9653465151786804, 0.9603960514068604]


In [171]:
# Load the best model
best_model = load_model('t-100-6500-lstm.keras')

In [117]:
# Test the model with new data
test = pd.read_excel('../test.xlsx', usecols=['kategori', 'berita'])

test.head()

Unnamed: 0,berita,kategori
0,Hakim Wahyu Iman Santoso Alami Kecelakaan Tung...,1
1,MEGAWATI DAN PUAN BERMAIN SLOT Nenek lampir pe...,1
2,JONATHAN LATUMAHINA SEORANG NASRANI DAN PENYUS...,1
3,"PDI-P Diblacklist dari Peserta Pilpres, Tak Bi...",1
4,Presiden Joe Biden dan Volodymyr Zelenskyy Ber...,1


In [118]:
print(test.shape)
print(test['kategori'].unique())

(1000, 2)
[1 0]


In [93]:
# Count occurrences of each label in the 'kategori' column
label_counts = test['kategori'].value_counts()

# Display the counts for "hoax" and "valid" (adjust if labels are different)
print("Number of hoax samples:", label_counts.get(1, 0))
print("Number of valid samples:", label_counts.get(0, 0))

Number of hoax samples: 500
Number of valid samples: 500


In [163]:
# Define necessary preprocessing parameters
voc_size = 5500           # Must match training
sentence_length = 203   # Must match training

In [129]:
# Encode the target labels (0 for valid, 1 for hoax)
test['kategori_enc'] = test['kategori']

In [68]:
# Initialize the Sastrawi stemmer
factory = StemmerFactory()
stemmer = factory.create_stemmer()

# Preprocess text: cleaning, stemming, removing stopwords
corpus_test = []
for i in range(len(test)):
    review = re.sub('[^a-zA-Z]', ' ', test['berita'][i])  # Remove non-letter characters
    review = review.lower()
    review = review.split()
    review = [stemmer.stem(word) for word in review if word not in stopwords.words('indonesian')]
    review = ' '.join(review)
    corpus_test.append(review)

In [74]:
# Save the `corpus` to a text file
with open('corpus_test.txt', 'w') as file:
    for sentence in corpus_test:
        file.write(sentence + '\n')

print("Corpus saved as 'corpus_test.txt'")

Corpus saved as 'corpus_test.txt'


In [120]:
len(corpus_test)

1000

In [119]:
# Read the corpus from the text file
corpus_test = []
with open('corpus_test.txt', 'r', encoding='utf-8') as f:
    corpus_test = [line.strip() for line in f.readlines()]

In [164]:
print(voc_size)
print(sentence_length)

5500
203


In [142]:
# Convert text to sequences using one-hot encoding and pad the sequences
one_hot_re = [one_hot(sentence, voc_size) for sentence in corpus_test]
new_x_final = pad_sequences(one_hot_re, padding='pre', maxlen=sentence_length)

# Get the true labels (if available) for evaluation
new_y_final = test['kategori_enc'].values
# Predict with the new data
new_predictions = (best_model.predict(new_x_final) > 0.5).astype("int32").flatten()

# Evaluate the new predictions (if labels are available)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

new_accuracy = accuracy_score(new_y_final, new_predictions)
new_precision = precision_score(new_y_final, new_predictions)
new_recall = recall_score(new_y_final, new_predictions)
new_f1 = f1_score(new_y_final, new_predictions)

# Print the evaluation results
print(f"Accuracy on new data: {new_accuracy}")
print(f"Precision on new data: {new_precision}")
print(f"Recall on new data: {new_recall}")
print(f"F1 Score on new data: {new_f1}")

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
Accuracy on new data: 0.91
Precision on new data: 0.91
Recall on new data: 0.91
F1 Score on new data: 0.91


In [172]:
# Tokenize the corpus
tokenizer = Tokenizer(num_words=6500)  # Use the same vocab size as during training
tokenizer.fit_on_texts(corpus_test)  # Fit tokenizer on the new corpus (could use the same tokenizer as before)
sequences = tokenizer.texts_to_sequences(corpus_test)

# Pad the sequences to ensure the same length as the training data
sentence_length = 203  # Ensure this matches the training sentence length
embedded_docs = pad_sequences(sequences, padding='pre', maxlen=sentence_length)

# Get the true labels (if available) for evaluation
new_y_final = test['kategori_enc'].values  # Assuming 'kategori' column contains labels (0 or 1)

# Predict with the model
new_predictions = (best_model.predict(embedded_docs) > 0.5).astype("int32").flatten()

# Evaluate the new predictions (if labels are available)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

new_accuracy = accuracy_score(new_y_final, new_predictions)
new_precision = precision_score(new_y_final, new_predictions)
new_recall = recall_score(new_y_final, new_predictions)
new_f1 = f1_score(new_y_final, new_predictions)

# Print the evaluation results
print(f"Accuracy on new data: {new_accuracy}")
print(f"Precision on new data: {new_precision}")
print(f"Recall on new data: {new_recall}")
print(f"F1 Score on new data: {new_f1}")

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
Accuracy on new data: 0.719
Precision on new data: 0.7494305239179955
Recall on new data: 0.658
F1 Score on new data: 0.7007454739084132


### Using Dropout Layer

In [206]:
voc_size=6500
embedding_vector_features=100

print(voc_size)
print(embedding_vector_features)

6500
100


In [207]:
#one-hot frepresentation

one_hot_re = [one_hot(element,voc_size) for element in corpus]

#using pad_sequences to make all the representations of fixed length
sentence_length=203
embedded_docs= pad_sequences(one_hot_re, padding='pre', maxlen=sentence_length)

In [220]:
# Tokenize the corpus
tokenizer = Tokenizer(num_words=6500)  # Use the same vocab size as during training
tokenizer.fit_on_texts(corpus)  # Fit tokenizer on the new corpus (could use the same tokenizer as before)
sequences = tokenizer.texts_to_sequences(corpus)

# Pad the sequences to ensure the same length as the training data
sentence_length = 203  # Ensure this matches the training sentence length
embedded_docs = pad_sequences(sequences, padding='pre', maxlen=sentence_length)

In [221]:
x_final = np.array(embedded_docs)
y_final= np.array(y)

In [180]:
def create_model(voc_size, embedding_vector_features, sentence_length):
    inputs = tf.keras.Input(shape=(sentence_length,))
    embedding_layer = Embedding(voc_size, embedding_vector_features)(inputs)
    dropout_layer_1 = Dropout(0.2)(embedding_layer)

    lstm_layer = LSTM(32)(dropout_layer_1)
    dropout_layer_2 = Dropout(0.2)(lstm_layer)

    outputs = Dense(1, activation='sigmoid')(dropout_layer_2)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [225]:
best_accuracy = 0 
best_precision = 0 
best_recall = 0  
best_f1 = 0

best_model = None 

best_val_loss = None  
best_val_accuracy = None  

for train_index, test_index in kf.split(x_final):
    x_train, x_test = x_final[train_index], x_final[test_index]
    y_train, y_test = y_final[train_index], y_final[test_index]
    
    # Create a new model for each fold
    model = create_model(voc_size=6500, embedding_vector_features=100, sentence_length=203)
    
    # Set up the model checkpoint to save the best model during training
    checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min', verbose=0)
    tensorboard_callback = TensorBoard(log_dir='./logs')

    # Train the model on the current fold with validation data
    history = model.fit(x_train, y_train, epochs=10, batch_size=64, verbose=0, validation_data=(x_test, y_test), callbacks=[checkpoint, tensorboard_callback])  
    
    # Load the best model based on validation loss
    model = load_model('best_model.keras')

    # Predict on the test set
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Track the best model based on accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_precision = precision
        best_recall = recall
        best_f1 = f1

        best_model = model
        # Store the validation loss and accuracy for the best model
        best_val_loss = history.history['val_loss']
        best_val_accuracy = history.history['val_accuracy']
    
print("Best Model Accuracy:", best_accuracy)
print("Best Model Precision:", best_precision)
print("Best Model Recall:", best_recall)
print("Best Model F1-Score:", best_f1)

# After the K-fold cross-validation loop, print eval losses and accuracies for the best model
print("Best Model Validation Loss:", best_val_loss)
print("Best Model Validation Accuracy:", best_val_accuracy)

# Optionally, save the best model
best_model.save('t-100-6500-lstm_w_d.keras')


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 294ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 83ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 104ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 101ms/step
Best Model Accuracy: 0.9702970297029703
Best Model Precision: 0.989247311827957
Best Model Recall: 0.9484536082474226
Best Model F1-Score: 0.968421052631579
Best Model Validation Loss: [0.6517898440361023, 0.48777469992637634, 0.29363057017326355, 0.18749037384986877, 0.15375757217407227, 0.14658133685588837, 0.13308806717395782, 0.15168888866901398, 0.15970005095005035, 0.16943763196468353]
Best Model Validation Accuracy: [0.7970296740531921, 0.7920792102813721, 0.9257425665855408, 0.9504950642585754, 0.9554455280303955, 0.9653465151786804, 0.9702970385551453, 0.9554455280303955, 0.9455445408821106, 0.9455445408821106]


In [226]:
# Load the best model 828387478
best_model = load_model('t-100-6500-lstm_w_d.keras')

In [191]:
# Read the corpus from the text file
corpus_test = []
with open('corpus_test.txt', 'r', encoding='utf-8') as f:
    corpus_test = [line.strip() for line in f.readlines()]

len(corpus_test)

1000

In [192]:
len(corpus_test)

1000

In [259]:
print(voc_size)

5500


In [227]:
# Tokenize the corpus
tokenizer = Tokenizer(num_words=6500)  # Use the same vocab size as during training
tokenizer.fit_on_texts(corpus_test)  # Fit tokenizer on the new corpus (could use the same tokenizer as before)
sequences = tokenizer.texts_to_sequences(corpus_test)

# Pad the sequences to ensure the same length as the training data
sentence_length = 203  # Ensure this matches the training sentence length
embedded_docs = pad_sequences(sequences, padding='pre', maxlen=sentence_length)

# Get the true labels (if available) for evaluation
new_y_final = test['kategori_enc'].values  # Assuming 'kategori' column contains labels (0 or 1)

# Predict with the model
new_predictions = (best_model.predict(embedded_docs) > 0.5).astype("int32").flatten()

# Evaluate the new predictions (if labels are available)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

new_accuracy = accuracy_score(new_y_final, new_predictions)
new_precision = precision_score(new_y_final, new_predictions)
new_recall = recall_score(new_y_final, new_predictions)
new_f1 = f1_score(new_y_final, new_predictions)

# Print the evaluation results
print(f"Accuracy on new data: {new_accuracy}")
print(f"Precision on new data: {new_precision}")
print(f"Recall on new data: {new_recall}")
print(f"F1 Score on new data: {new_f1}")

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 43ms/step
Accuracy on new data: 0.727
Precision on new data: 0.7830423940149626
Recall on new data: 0.628
F1 Score on new data: 0.6970033296337403


In [211]:
voc_size=6500
sentence_length=203

# Convert text to sequences using one-hot encoding and pad the sequences
one_hot_re = [one_hot(sentence, voc_size) for sentence in corpus_test]
new_x_final = pad_sequences(one_hot_re, padding='pre', maxlen=sentence_length)

# Prepare the labels
new_y_final = test['kategori_enc'].values

# Predict with the new data
new_predictions = (best_model.predict(new_x_final) > 0.5).astype("int32").flatten()

# Evaluate the new predictions
new_accuracy = accuracy_score(new_y_final, new_predictions)
new_precision = precision_score(new_y_final, new_predictions)
new_recall = recall_score(new_y_final, new_predictions)
new_f1 = f1_score(new_y_final, new_predictions)

print(f"Accuracy on new data: {new_accuracy}")
print(f"Precision on new data: {new_precision}")
print(f"Recall on new data: {new_recall}")
print(f"F1 Score on new data: {new_f1}")

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step
Accuracy on new data: 0.919
Precision on new data: 0.9165009940357853
Recall on new data: 0.922
F1 Score on new data: 0.9192422731804586


### Bidirectional LSTM

In [248]:
# Read the corpus from the text file
corpus = []
with open('corpus.txt', 'r', encoding='utf-8') as f:
    corpus = [line.strip() for line in f.readlines()]

In [242]:
voc_size = 6500
embedding_vector_features = 50
print(len(corpus))
print(voc_size)
print(sentence_length)

1014
6500
203


In [243]:
#one-hot frepresentation

one_hot_re = [one_hot(element,voc_size) for element in corpus]

#using pad_sequences to make all the representations of fixed length
sentence_length=203
embedded_docs= pad_sequences(one_hot_re, padding='pre', maxlen=sentence_length)

In [259]:
# Tokenize the corpus
tokenizer = Tokenizer(num_words=6500)  # Use the same vocab size as during training
tokenizer.fit_on_texts(corpus)  # Fit tokenizer on the new corpus (could use the same tokenizer as before)
sequences = tokenizer.texts_to_sequences(corpus)

# Pad the sequences to ensure the same length as the training data
sentence_length = 203  # Ensure this matches the training sentence length
embedded_docs = pad_sequences(sequences, padding='pre', maxlen=sentence_length)

In [260]:
x_final = np.array(embedded_docs)
y_final= np.array(y)

In [235]:
def create_model(voc_size, embedding_vector_features, sentence_length):
    inputs = tf.keras.Input(shape=(sentence_length,))
    embedding_layer = Embedding(voc_size, embedding_vector_features)(inputs)
    bi_layer = Bidirectional(LSTM(32))(embedding_layer)

    outputs = Dense(1, activation='sigmoid')(bi_layer)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [264]:
best_accuracy = 0 
best_precision = 0 
best_recall = 0  
best_f1 = 0

best_model = None 

best_val_loss = None  
best_val_accuracy = None  

for train_index, test_index in kf.split(x_final):
    x_train, x_test = x_final[train_index], x_final[test_index]
    y_train, y_test = y_final[train_index], y_final[test_index]
    
    # Create a new model for each fold
    model = create_model(voc_size=6500, embedding_vector_features=100, sentence_length=203)
    
    # Set up the model checkpoint to save the best model during training
    checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min', verbose=0)
    tensorboard_callback = TensorBoard(log_dir='./logs')

    # Train the model on the current fold with validation data
    history = model.fit(x_train, y_train, epochs=10, batch_size=64, verbose=0, validation_data=(x_test, y_test), callbacks=[checkpoint, tensorboard_callback])  
    
    # Load the best model based on validation loss
    model = load_model('best_model.keras')

    # Predict on the test set
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Track the best model based on accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_precision = precision
        best_recall = recall
        best_f1 = f1

        best_model = model
        # Store the validation loss and accuracy for the best model
        best_val_loss = history.history['val_loss']
        best_val_accuracy = history.history['val_accuracy']
    
print("Best Model Accuracy:", best_accuracy)
print("Best Model Precision:", best_precision)
print("Best Model Recall:", best_recall)
print("Best Model F1-Score:", best_f1)

# After the K-fold cross-validation loop, print eval losses and accuracies for the best model
print("Best Model Validation Loss:", best_val_loss)
print("Best Model Validation Accuracy:", best_val_accuracy)

# Optionally, save the best model
best_model.save('t-100-6500-bilstm.keras')

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step
Best Model Accuracy: 0.9653465346534653
Best Model Precision: 0.96875
Best Model Recall: 0.9587628865979382
Best Model F1-Score: 0.9637305699481865
Best Model Validation Loss: [0.6695782542228699, 0.5239341259002686, 0.3358866274356842, 0.20280885696411133, 0.16426613926887512, 0.17846271395683289, 0.13157925009727478, 0.15787553787231445, 0.34640681743621826, 0.24027611315250397]
Best Model Validation Accuracy: [0.5346534848213196, 0.8118811845779419, 0.896039605140686, 0.9554455280303955, 0.9702970385551453, 0.9554455280303955, 0.9653465151786804, 0.9653465151786804, 0.8861386179924011, 0.9207921028137207]


In [265]:
best_model = load_model('t-100-6500-bilstm.keras')

In [23]:
# Read the corpus from the text file
corpus_test = []
with open('corpus_test.txt', 'r', encoding='utf-8') as f:
    corpus_test = [line.strip() for line in f.readlines()]

In [96]:
len(corpus_test)

1000

In [266]:
# Tokenize the corpus
tokenizer = Tokenizer(num_words=6500)  # Use the same vocab size as during training
tokenizer.fit_on_texts(corpus_test)  # Fit tokenizer on the new corpus (could use the same tokenizer as before)
sequences = tokenizer.texts_to_sequences(corpus_test)

# Pad the sequences to ensure the same length as the training data
sentence_length = 203  # Ensure this matches the training sentence length
embedded_docs = pad_sequences(sequences, padding='pre', maxlen=sentence_length)

# Get the true labels (if available) for evaluation
new_y_final = test['kategori_enc'].values  # Assuming 'kategori' column contains labels (0 or 1)

# Predict with the model
new_predictions = (best_model.predict(embedded_docs) > 0.5).astype("int32").flatten()

# Evaluate the new predictions (if labels are available)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

new_accuracy = accuracy_score(new_y_final, new_predictions)
new_precision = precision_score(new_y_final, new_predictions)
new_recall = recall_score(new_y_final, new_predictions)
new_f1 = f1_score(new_y_final, new_predictions)

# Print the evaluation results
print(f"Accuracy on new data: {new_accuracy}")
print(f"Precision on new data: {new_precision}")
print(f"Recall on new data: {new_recall}")
print(f"F1 Score on new data: {new_f1}")

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step
Accuracy on new data: 0.749
Precision on new data: 0.782312925170068
Recall on new data: 0.69
F1 Score on new data: 0.7332624867162593


In [250]:
voc_size=6500
sentence_length=203

# Convert text to sequences using one-hot encoding and pad the sequences
one_hot_re = [one_hot(sentence, voc_size) for sentence in corpus_test]
new_x_final = pad_sequences(one_hot_re, padding='pre', maxlen=sentence_length)

# Prepare the labels
new_y_final = test['kategori_enc'].values

# Predict with the new data
new_predictions = (best_model.predict(new_x_final) > 0.5).astype("int32").flatten()

# Evaluate the new predictions
new_accuracy = accuracy_score(new_y_final, new_predictions)
new_precision = precision_score(new_y_final, new_predictions)
new_recall = recall_score(new_y_final, new_predictions)
new_f1 = f1_score(new_y_final, new_predictions)

print(f"Accuracy on new data: {new_accuracy}")
print(f"Precision on new data: {new_precision}")
print(f"Recall on new data: {new_recall}")
print(f"F1 Score on new data: {new_f1}")

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step
Accuracy on new data: 0.902
Precision on new data: 0.9222689075630253
Recall on new data: 0.878
F1 Score on new data: 0.8995901639344263


### Transformer

In [87]:
import pandas as pd
import numpy as np
from transformers import RobertaTokenizer

# Load dataset
data = pd.read_excel('dataBerita.xlsx', usecols=['kategori', 'berita'])

# Membersihkan data dengan menghapus whitespace dan karakter tidak perlu
data['kategori'] = data['kategori'].str.strip()        # Menghapus whitespace di awal dan akhir
data['kategori'] = data['kategori'].str.replace(';', '', regex=False)  # Menghapus tanda semicolon
data['kategori_enc'] = data['kategori'].map({'valid': 0, 'hoax': 1})
texts = data['berita'].tolist()

# Initialize RoBERTa tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Tokenisasi dan hitung panjang masing-masing teks
lengths = [len(tokenizer.encode(text)) for text in texts]

# Hitung panjang pada persentil ke-95
percentile_95 = np.percentile(lengths, 95)

# Menampilkan hasil
print(f"Nilai panjang teks pada persentil ke-95: {percentile_95}")


  from .autonotebook import tqdm as notebook_tqdm
Token indices sequence length is longer than the specified maximum sequence length for this model (1424 > 512). Running this sequence through the model will result in indexing errors


Nilai panjang teks pada persentil ke-95: 944.849999999999


In [88]:
import pandas as pd
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset

# Load dataset
data = pd.read_excel('dataBerita.xlsx', usecols=['kategori', 'berita'])
# Membersihkan data dengan menghapus whitespace dan karakter tidak perlu
data['kategori'] = data['kategori'].str.strip()        # Menghapus whitespace di awal dan akhir
data['kategori'] = data['kategori'].str.replace(';', '', regex=False)  # Menghapus tanda semicolon





In [89]:
data['kategori_enc'] = data['kategori'].map({'valid': 0, 'hoax': 1})
texts = data['berita'].tolist()
labels = data['kategori_enc'].tolist()

# Initialize RoBERTa tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

In [90]:
data['kategori_enc'].unique()

array([0, 1])

In [91]:
label_counts = data['kategori'].value_counts()

print("Number of hoax samples:", label_counts.get('hoax', 0))
print("Number of valid samples:", label_counts.get('valid', 0))

Number of hoax samples: 507
Number of valid samples: 507


In [92]:
# Custom Dataset class for RoBERTa
class CustomDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=216):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        item = {key: val.squeeze() for key, val in encoding.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

In [94]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
# Define K-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
best_model = None
best_fold = -1
accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []

In [95]:
for fold, (train_index, test_index) in enumerate(kf.split(texts)):
    print(f"Training fold {fold + 1}...")
    
    # Split the data for this fold
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]
    
    # Create datasets for this fold
    train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
    test_dataset = CustomDataset(test_texts, test_labels, tokenizer)
    
    # Load model for each fold
    model = RobertaForSequenceClassification.from_pretrained('distilroberta-base', num_labels=2)
    
    # Define training arguments for each fold
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=10,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        logging_dir='./logs',
        evaluation_strategy="epoch"
    )

    # Define metrics function
    def compute_metrics(pred):
        labels = pred.label_ids
        preds = pred.predictions.argmax(-1)
        accuracy = accuracy_score(labels, preds)
        precision = precision_score(labels, preds)
        recall = recall_score(labels, preds)
        f1 = f1_score(labels, preds)
        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1
        }

    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    # Train and evaluate
    trainer.train()
    eval_results = trainer.evaluate()

    # Extract and store metrics for this fold
    accuracy = eval_results['eval_accuracy']
    precision = eval_results['eval_precision']
    recall = eval_results['eval_recall']
    f1 = eval_results['eval_f1']

    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    
    # Track the best model based on accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model
        best_fold = fold+1

Training fold 1...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
                                                   
 10%|█         | 102/1020 [05:45<40:47,  2.67s/it]

{'eval_loss': 0.2568579614162445, 'eval_accuracy': 0.9211822660098522, 'eval_precision': 0.967741935483871, 'eval_recall': 0.8737864077669902, 'eval_f1': 0.9183673469387755, 'eval_runtime': 22.2728, 'eval_samples_per_second': 9.114, 'eval_steps_per_second': 1.167, 'epoch': 1.0}


                                                    
 20%|██        | 204/1020 [11:37<36:39,  2.70s/it]

{'eval_loss': 0.14208431541919708, 'eval_accuracy': 0.9655172413793104, 'eval_precision': 0.9528301886792453, 'eval_recall': 0.9805825242718447, 'eval_f1': 0.9665071770334929, 'eval_runtime': 22.0945, 'eval_samples_per_second': 9.188, 'eval_steps_per_second': 1.177, 'epoch': 2.0}


                                                    
 30%|███       | 306/1020 [17:25<31:39,  2.66s/it]

{'eval_loss': 0.16715845465660095, 'eval_accuracy': 0.9704433497536946, 'eval_precision': 0.970873786407767, 'eval_recall': 0.970873786407767, 'eval_f1': 0.970873786407767, 'eval_runtime': 22.7804, 'eval_samples_per_second': 8.911, 'eval_steps_per_second': 1.141, 'epoch': 3.0}


                                                    
 40%|████      | 408/1020 [23:12<27:20,  2.68s/it]

{'eval_loss': 0.07900776714086533, 'eval_accuracy': 0.9802955665024631, 'eval_precision': 0.9805825242718447, 'eval_recall': 0.9805825242718447, 'eval_f1': 0.9805825242718447, 'eval_runtime': 22.4044, 'eval_samples_per_second': 9.061, 'eval_steps_per_second': 1.16, 'epoch': 4.0}


 49%|████▉     | 500/1020 [28:07<27:36,  3.18s/it]  

{'loss': 0.1818, 'grad_norm': 0.009881209582090378, 'learning_rate': 2.5490196078431373e-05, 'epoch': 4.9}


                                                  
 50%|█████     | 510/1020 [29:03<23:48,  2.80s/it]

{'eval_loss': 0.1718345582485199, 'eval_accuracy': 0.9753694581280788, 'eval_precision': 1.0, 'eval_recall': 0.9514563106796117, 'eval_f1': 0.9751243781094527, 'eval_runtime': 22.5159, 'eval_samples_per_second': 9.016, 'eval_steps_per_second': 1.155, 'epoch': 5.0}


                                                    
 60%|██████    | 612/1020 [35:08<19:19,  2.84s/it]

{'eval_loss': 0.06587190926074982, 'eval_accuracy': 0.9901477832512315, 'eval_precision': 1.0, 'eval_recall': 0.9805825242718447, 'eval_f1': 0.9901960784313726, 'eval_runtime': 22.3389, 'eval_samples_per_second': 9.087, 'eval_steps_per_second': 1.164, 'epoch': 6.0}


                                                    
 70%|███████   | 714/1020 [41:09<13:47,  2.71s/it]

{'eval_loss': 0.07284264266490936, 'eval_accuracy': 0.9852216748768473, 'eval_precision': 0.9901960784313726, 'eval_recall': 0.9805825242718447, 'eval_f1': 0.9853658536585366, 'eval_runtime': 22.0933, 'eval_samples_per_second': 9.188, 'eval_steps_per_second': 1.177, 'epoch': 7.0}


                                                  
 80%|████████  | 816/1020 [47:15<09:39,  2.84s/it]

{'eval_loss': 0.08038230240345001, 'eval_accuracy': 0.9802955665024631, 'eval_precision': 0.9805825242718447, 'eval_recall': 0.9805825242718447, 'eval_f1': 0.9805825242718447, 'eval_runtime': 22.1512, 'eval_samples_per_second': 9.164, 'eval_steps_per_second': 1.174, 'epoch': 8.0}


                                                  
 90%|█████████ | 918/1020 [53:15<04:35,  2.70s/it]

{'eval_loss': 0.10634855180978775, 'eval_accuracy': 0.9802955665024631, 'eval_precision': 0.9805825242718447, 'eval_recall': 0.9805825242718447, 'eval_f1': 0.9805825242718447, 'eval_runtime': 21.5703, 'eval_samples_per_second': 9.411, 'eval_steps_per_second': 1.205, 'epoch': 9.0}


 98%|█████████▊| 1000/1020 [57:48<01:07,  3.38s/it]

{'loss': 0.0132, 'grad_norm': 0.009946553036570549, 'learning_rate': 9.80392156862745e-07, 'epoch': 9.8}


                                                   
100%|██████████| 1020/1020 [59:15<00:00,  3.49s/it]


{'eval_loss': 0.10549121350049973, 'eval_accuracy': 0.9802955665024631, 'eval_precision': 0.9805825242718447, 'eval_recall': 0.9805825242718447, 'eval_f1': 0.9805825242718447, 'eval_runtime': 21.4962, 'eval_samples_per_second': 9.444, 'eval_steps_per_second': 1.21, 'epoch': 10.0}
{'train_runtime': 3555.924, 'train_samples_per_second': 2.281, 'train_steps_per_second': 0.287, 'train_loss': 0.09560388245062866, 'epoch': 10.0}


100%|██████████| 26/26 [00:21<00:00,  1.24it/s]


Training fold 2...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
 10%|█         | 102/1020 [05:34<42:17,  2.76s/it]
 10%|█         | 102/1020 [05:56<42:17,  2.76s/it]

{'eval_loss': 0.21501658856868744, 'eval_accuracy': 0.9458128078817734, 'eval_precision': 0.9285714285714286, 'eval_recall': 0.9578947368421052, 'eval_f1': 0.9430051813471503, 'eval_runtime': 21.8448, 'eval_samples_per_second': 9.293, 'eval_steps_per_second': 1.19, 'epoch': 1.0}


 20%|██        | 204/1020 [11:30<36:28,  2.68s/it]  
 20%|██        | 204/1020 [11:52<36:28,  2.68s/it]

{'eval_loss': 0.22537001967430115, 'eval_accuracy': 0.9458128078817734, 'eval_precision': 0.9285714285714286, 'eval_recall': 0.9578947368421052, 'eval_f1': 0.9430051813471503, 'eval_runtime': 21.9186, 'eval_samples_per_second': 9.262, 'eval_steps_per_second': 1.186, 'epoch': 2.0}


 30%|███       | 306/1020 [17:26<32:30,  2.73s/it]  
 30%|███       | 306/1020 [17:48<32:30,  2.73s/it]

{'eval_loss': 0.1941279023885727, 'eval_accuracy': 0.9458128078817734, 'eval_precision': 0.9666666666666667, 'eval_recall': 0.9157894736842105, 'eval_f1': 0.9405405405405406, 'eval_runtime': 21.5473, 'eval_samples_per_second': 9.421, 'eval_steps_per_second': 1.207, 'epoch': 3.0}


 40%|████      | 408/1020 [23:18<27:31,  2.70s/it]  
 40%|████      | 408/1020 [23:40<27:31,  2.70s/it]

{'eval_loss': 0.2780507206916809, 'eval_accuracy': 0.9507389162561576, 'eval_precision': 0.9381443298969072, 'eval_recall': 0.9578947368421052, 'eval_f1': 0.9479166666666666, 'eval_runtime': 21.8191, 'eval_samples_per_second': 9.304, 'eval_steps_per_second': 1.192, 'epoch': 4.0}


 49%|████▉     | 500/1020 [28:47<28:45,  3.32s/it]  

{'loss': 0.1474, 'grad_norm': 0.03145629167556763, 'learning_rate': 2.5490196078431373e-05, 'epoch': 4.9}


 50%|█████     | 510/1020 [29:20<24:09,  2.84s/it]
 50%|█████     | 510/1020 [29:42<24:09,  2.84s/it]

{'eval_loss': 0.3379236161708832, 'eval_accuracy': 0.9507389162561576, 'eval_precision': 0.9381443298969072, 'eval_recall': 0.9578947368421052, 'eval_f1': 0.9479166666666666, 'eval_runtime': 21.7509, 'eval_samples_per_second': 9.333, 'eval_steps_per_second': 1.195, 'epoch': 5.0}


 60%|██████    | 612/1020 [35:08<18:28,  2.72s/it]  
 60%|██████    | 612/1020 [35:29<18:28,  2.72s/it]

{'eval_loss': 0.4088763892650604, 'eval_accuracy': 0.9507389162561576, 'eval_precision': 0.956989247311828, 'eval_recall': 0.9368421052631579, 'eval_f1': 0.9468085106382979, 'eval_runtime': 21.5618, 'eval_samples_per_second': 9.415, 'eval_steps_per_second': 1.206, 'epoch': 6.0}


 70%|███████   | 714/1020 [41:07<13:52,  2.72s/it]  
 70%|███████   | 714/1020 [41:29<13:52,  2.72s/it]

{'eval_loss': 0.3960358500480652, 'eval_accuracy': 0.9507389162561576, 'eval_precision': 0.967032967032967, 'eval_recall': 0.9263157894736842, 'eval_f1': 0.946236559139785, 'eval_runtime': 21.7863, 'eval_samples_per_second': 9.318, 'eval_steps_per_second': 1.193, 'epoch': 7.0}


 80%|████████  | 816/1020 [47:06<09:19,  2.74s/it]
 80%|████████  | 816/1020 [47:28<09:19,  2.74s/it]

{'eval_loss': 0.3858775496482849, 'eval_accuracy': 0.9507389162561576, 'eval_precision': 0.956989247311828, 'eval_recall': 0.9368421052631579, 'eval_f1': 0.9468085106382979, 'eval_runtime': 21.6051, 'eval_samples_per_second': 9.396, 'eval_steps_per_second': 1.203, 'epoch': 8.0}


 90%|█████████ | 918/1020 [53:05<04:36,  2.72s/it]
 90%|█████████ | 918/1020 [53:27<04:36,  2.72s/it]

{'eval_loss': 0.3974909782409668, 'eval_accuracy': 0.9556650246305419, 'eval_precision': 0.967391304347826, 'eval_recall': 0.9368421052631579, 'eval_f1': 0.9518716577540107, 'eval_runtime': 21.889, 'eval_samples_per_second': 9.274, 'eval_steps_per_second': 1.188, 'epoch': 9.0}


 98%|█████████▊| 1000/1020 [57:57<01:07,  3.37s/it]

{'loss': 0.006, 'grad_norm': 0.0012571369297802448, 'learning_rate': 9.80392156862745e-07, 'epoch': 9.8}


100%|██████████| 1020/1020 [59:03<00:00,  2.65s/it]
100%|██████████| 1020/1020 [59:28<00:00,  3.50s/it]


{'eval_loss': 0.3983173966407776, 'eval_accuracy': 0.9556650246305419, 'eval_precision': 0.967391304347826, 'eval_recall': 0.9368421052631579, 'eval_f1': 0.9518716577540107, 'eval_runtime': 20.4816, 'eval_samples_per_second': 9.911, 'eval_steps_per_second': 1.269, 'epoch': 10.0}
{'train_runtime': 3568.7892, 'train_samples_per_second': 2.272, 'train_steps_per_second': 0.286, 'train_loss': 0.07517132427521717, 'epoch': 10.0}


100%|██████████| 26/26 [00:20<00:00,  1.24it/s]


Training fold 3...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
 10%|█         | 102/1020 [05:35<40:58,  2.68s/it]
 10%|█         | 102/1020 [05:57<40:58,  2.68s/it]

{'eval_loss': 0.21663902699947357, 'eval_accuracy': 0.9310344827586207, 'eval_precision': 0.9666666666666667, 'eval_recall': 0.8877551020408163, 'eval_f1': 0.925531914893617, 'eval_runtime': 21.718, 'eval_samples_per_second': 9.347, 'eval_steps_per_second': 1.197, 'epoch': 1.0}


 20%|██        | 204/1020 [11:30<36:33,  2.69s/it]  
 20%|██        | 204/1020 [11:52<36:33,  2.69s/it]

{'eval_loss': 0.22021448612213135, 'eval_accuracy': 0.9458128078817734, 'eval_precision': 0.978021978021978, 'eval_recall': 0.9081632653061225, 'eval_f1': 0.9417989417989417, 'eval_runtime': 21.8388, 'eval_samples_per_second': 9.295, 'eval_steps_per_second': 1.191, 'epoch': 2.0}


 30%|███       | 306/1020 [17:20<31:30,  2.65s/it]  
 30%|███       | 306/1020 [17:42<31:30,  2.65s/it]

{'eval_loss': 0.19722838699817657, 'eval_accuracy': 0.9605911330049262, 'eval_precision': 0.96875, 'eval_recall': 0.9489795918367347, 'eval_f1': 0.9587628865979382, 'eval_runtime': 21.6464, 'eval_samples_per_second': 9.378, 'eval_steps_per_second': 1.201, 'epoch': 3.0}


 40%|████      | 408/1020 [23:12<28:30,  2.80s/it]  
 40%|████      | 408/1020 [23:33<28:30,  2.80s/it]

{'eval_loss': 0.2916131317615509, 'eval_accuracy': 0.9458128078817734, 'eval_precision': 0.9887640449438202, 'eval_recall': 0.8979591836734694, 'eval_f1': 0.9411764705882353, 'eval_runtime': 21.682, 'eval_samples_per_second': 9.363, 'eval_steps_per_second': 1.199, 'epoch': 4.0}


 49%|████▉     | 500/1020 [28:30<27:31,  3.18s/it]  

{'loss': 0.1446, 'grad_norm': 0.0014424180844798684, 'learning_rate': 2.5490196078431373e-05, 'epoch': 4.9}


 50%|█████     | 510/1020 [29:05<22:57,  2.70s/it]
 50%|█████     | 510/1020 [29:27<22:57,  2.70s/it]

{'eval_loss': 0.4050520658493042, 'eval_accuracy': 0.9408866995073891, 'eval_precision': 0.9777777777777777, 'eval_recall': 0.8979591836734694, 'eval_f1': 0.9361702127659575, 'eval_runtime': 21.6628, 'eval_samples_per_second': 9.371, 'eval_steps_per_second': 1.2, 'epoch': 5.0}


 60%|██████    | 612/1020 [34:46<17:43,  2.61s/it]  
 60%|██████    | 612/1020 [35:08<17:43,  2.61s/it]

{'eval_loss': 0.42552894353866577, 'eval_accuracy': 0.9261083743842364, 'eval_precision': 0.9770114942528736, 'eval_recall': 0.8673469387755102, 'eval_f1': 0.918918918918919, 'eval_runtime': 21.7251, 'eval_samples_per_second': 9.344, 'eval_steps_per_second': 1.197, 'epoch': 6.0}


 70%|███████   | 714/1020 [40:28<13:34,  2.66s/it]  
 70%|███████   | 714/1020 [40:50<13:34,  2.66s/it]

{'eval_loss': 0.39993777871131897, 'eval_accuracy': 0.9507389162561576, 'eval_precision': 0.9782608695652174, 'eval_recall': 0.9183673469387755, 'eval_f1': 0.9473684210526315, 'eval_runtime': 21.4738, 'eval_samples_per_second': 9.453, 'eval_steps_per_second': 1.211, 'epoch': 7.0}


 80%|████████  | 816/1020 [46:10<09:00,  2.65s/it]
 80%|████████  | 816/1020 [46:32<09:00,  2.65s/it]

{'eval_loss': 0.5196049213409424, 'eval_accuracy': 0.9359605911330049, 'eval_precision': 0.9775280898876404, 'eval_recall': 0.8877551020408163, 'eval_f1': 0.93048128342246, 'eval_runtime': 21.7418, 'eval_samples_per_second': 9.337, 'eval_steps_per_second': 1.196, 'epoch': 8.0}


 90%|█████████ | 918/1020 [51:53<04:33,  2.68s/it]
 90%|█████████ | 918/1020 [52:14<04:33,  2.68s/it]

{'eval_loss': 0.523329496383667, 'eval_accuracy': 0.9408866995073891, 'eval_precision': 0.9777777777777777, 'eval_recall': 0.8979591836734694, 'eval_f1': 0.9361702127659575, 'eval_runtime': 21.4452, 'eval_samples_per_second': 9.466, 'eval_steps_per_second': 1.212, 'epoch': 9.0}


 98%|█████████▊| 1000/1020 [56:34<01:03,  3.17s/it]

{'loss': 0.0043, 'grad_norm': 0.00048229817184619606, 'learning_rate': 9.80392156862745e-07, 'epoch': 9.8}


100%|██████████| 1020/1020 [57:39<00:00,  2.68s/it]
100%|██████████| 1020/1020 [58:03<00:00,  3.41s/it]


{'eval_loss': 0.5181981325149536, 'eval_accuracy': 0.9408866995073891, 'eval_precision': 0.9777777777777777, 'eval_recall': 0.8979591836734694, 'eval_f1': 0.9361702127659575, 'eval_runtime': 21.3669, 'eval_samples_per_second': 9.501, 'eval_steps_per_second': 1.217, 'epoch': 10.0}
{'train_runtime': 3483.2381, 'train_samples_per_second': 2.328, 'train_steps_per_second': 0.293, 'train_loss': 0.07300707561006158, 'epoch': 10.0}


100%|██████████| 26/26 [00:20<00:00,  1.25it/s]


Training fold 4...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
 10%|█         | 102/1020 [05:29<42:05,  2.75s/it]
 10%|█         | 102/1020 [05:52<42:05,  2.75s/it]

{'eval_loss': 0.5127457976341248, 'eval_accuracy': 0.8817733990147784, 'eval_precision': 0.9891304347826086, 'eval_recall': 0.7982456140350878, 'eval_f1': 0.883495145631068, 'eval_runtime': 23.2026, 'eval_samples_per_second': 8.749, 'eval_steps_per_second': 1.121, 'epoch': 1.0}


 20%|██        | 204/1020 [11:26<35:50,  2.64s/it]  
 20%|██        | 204/1020 [11:47<35:50,  2.64s/it]

{'eval_loss': 0.24222233891487122, 'eval_accuracy': 0.9507389162561576, 'eval_precision': 0.956140350877193, 'eval_recall': 0.956140350877193, 'eval_f1': 0.956140350877193, 'eval_runtime': 21.6603, 'eval_samples_per_second': 9.372, 'eval_steps_per_second': 1.2, 'epoch': 2.0}


 30%|███       | 306/1020 [17:19<32:12,  2.71s/it]  
 30%|███       | 306/1020 [17:41<32:12,  2.71s/it]

{'eval_loss': 0.3322894871234894, 'eval_accuracy': 0.9310344827586207, 'eval_precision': 0.9464285714285714, 'eval_recall': 0.9298245614035088, 'eval_f1': 0.9380530973451328, 'eval_runtime': 21.75, 'eval_samples_per_second': 9.333, 'eval_steps_per_second': 1.195, 'epoch': 3.0}


 40%|████      | 408/1020 [23:12<28:08,  2.76s/it]  
 40%|████      | 408/1020 [23:33<28:08,  2.76s/it]

{'eval_loss': 0.34961169958114624, 'eval_accuracy': 0.9507389162561576, 'eval_precision': 0.9814814814814815, 'eval_recall': 0.9298245614035088, 'eval_f1': 0.954954954954955, 'eval_runtime': 21.5686, 'eval_samples_per_second': 9.412, 'eval_steps_per_second': 1.205, 'epoch': 4.0}


 49%|████▉     | 500/1020 [28:36<28:35,  3.30s/it]  

{'loss': 0.1454, 'grad_norm': 0.01872587762773037, 'learning_rate': 2.5490196078431373e-05, 'epoch': 4.9}


 50%|█████     | 510/1020 [29:09<22:51,  2.69s/it]
 50%|█████     | 510/1020 [29:31<22:51,  2.69s/it]

{'eval_loss': 0.4384641945362091, 'eval_accuracy': 0.9507389162561576, 'eval_precision': 0.9814814814814815, 'eval_recall': 0.9298245614035088, 'eval_f1': 0.954954954954955, 'eval_runtime': 21.6054, 'eval_samples_per_second': 9.396, 'eval_steps_per_second': 1.203, 'epoch': 5.0}


 60%|██████    | 612/1020 [34:49<18:01,  2.65s/it]  
 60%|██████    | 612/1020 [35:11<18:01,  2.65s/it]

{'eval_loss': 0.6816512942314148, 'eval_accuracy': 0.9310344827586207, 'eval_precision': 0.9807692307692307, 'eval_recall': 0.8947368421052632, 'eval_f1': 0.9357798165137615, 'eval_runtime': 21.6851, 'eval_samples_per_second': 9.361, 'eval_steps_per_second': 1.199, 'epoch': 6.0}


 70%|███████   | 714/1020 [40:28<13:20,  2.61s/it]  
 70%|███████   | 714/1020 [40:50<13:20,  2.61s/it]

{'eval_loss': 0.562383770942688, 'eval_accuracy': 0.9408866995073891, 'eval_precision': 0.9811320754716981, 'eval_recall': 0.9122807017543859, 'eval_f1': 0.9454545454545454, 'eval_runtime': 21.791, 'eval_samples_per_second': 9.316, 'eval_steps_per_second': 1.193, 'epoch': 7.0}


 80%|████████  | 816/1020 [46:08<08:56,  2.63s/it]
 80%|████████  | 816/1020 [46:32<08:56,  2.63s/it]

{'eval_loss': 0.560398519039154, 'eval_accuracy': 0.9458128078817734, 'eval_precision': 0.9813084112149533, 'eval_recall': 0.9210526315789473, 'eval_f1': 0.9502262443438914, 'eval_runtime': 24.0716, 'eval_samples_per_second': 8.433, 'eval_steps_per_second': 1.08, 'epoch': 8.0}


 90%|█████████ | 918/1020 [52:11<04:45,  2.80s/it]
 90%|█████████ | 918/1020 [52:33<04:45,  2.80s/it]

{'eval_loss': 0.5616401433944702, 'eval_accuracy': 0.9408866995073891, 'eval_precision': 0.9811320754716981, 'eval_recall': 0.9122807017543859, 'eval_f1': 0.9454545454545454, 'eval_runtime': 21.8406, 'eval_samples_per_second': 9.295, 'eval_steps_per_second': 1.19, 'epoch': 9.0}


 98%|█████████▊| 1000/1020 [57:01<01:03,  3.18s/it]

{'loss': 0.0075, 'grad_norm': 0.0004252715443726629, 'learning_rate': 9.80392156862745e-07, 'epoch': 9.8}


100%|██████████| 1020/1020 [58:08<00:00,  2.64s/it]
100%|██████████| 1020/1020 [58:32<00:00,  3.44s/it]


{'eval_loss': 0.5630432963371277, 'eval_accuracy': 0.9408866995073891, 'eval_precision': 0.9811320754716981, 'eval_recall': 0.9122807017543859, 'eval_f1': 0.9454545454545454, 'eval_runtime': 21.4929, 'eval_samples_per_second': 9.445, 'eval_steps_per_second': 1.21, 'epoch': 10.0}
{'train_runtime': 3512.4934, 'train_samples_per_second': 2.309, 'train_steps_per_second': 0.29, 'train_loss': 0.07491862639906365, 'epoch': 10.0}


100%|██████████| 26/26 [00:21<00:00,  1.21it/s]


Training fold 5...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
 10%|█         | 102/1020 [05:36<43:57,  2.87s/it]
 10%|█         | 102/1020 [05:58<43:57,  2.87s/it]

{'eval_loss': 0.14695677161216736, 'eval_accuracy': 0.9554455445544554, 'eval_precision': 0.9782608695652174, 'eval_recall': 0.9278350515463918, 'eval_f1': 0.9523809523809523, 'eval_runtime': 22.1539, 'eval_samples_per_second': 9.118, 'eval_steps_per_second': 1.174, 'epoch': 1.0}


 20%|██        | 204/1020 [11:37<40:01,  2.94s/it]  
 20%|██        | 204/1020 [11:59<40:01,  2.94s/it]

{'eval_loss': 0.15474861860275269, 'eval_accuracy': 0.9653465346534653, 'eval_precision': 0.9891304347826086, 'eval_recall': 0.9381443298969072, 'eval_f1': 0.9629629629629629, 'eval_runtime': 21.7433, 'eval_samples_per_second': 9.29, 'eval_steps_per_second': 1.196, 'epoch': 2.0}


 30%|███       | 306/1020 [17:31<34:25,  2.89s/it]  
 30%|███       | 306/1020 [17:53<34:25,  2.89s/it]

{'eval_loss': 0.19017042219638824, 'eval_accuracy': 0.9702970297029703, 'eval_precision': 0.989247311827957, 'eval_recall': 0.9484536082474226, 'eval_f1': 0.968421052631579, 'eval_runtime': 21.9169, 'eval_samples_per_second': 9.217, 'eval_steps_per_second': 1.186, 'epoch': 3.0}


 40%|████      | 408/1020 [23:25<28:54,  2.83s/it]  
 40%|████      | 408/1020 [23:46<28:54,  2.83s/it]

{'eval_loss': 0.2532637119293213, 'eval_accuracy': 0.9603960396039604, 'eval_precision': 0.9587628865979382, 'eval_recall': 0.9587628865979382, 'eval_f1': 0.9587628865979382, 'eval_runtime': 21.4578, 'eval_samples_per_second': 9.414, 'eval_steps_per_second': 1.212, 'epoch': 4.0}


 49%|████▉     | 500/1020 [28:44<28:28,  3.29s/it]  

{'loss': 0.1306, 'grad_norm': 0.0011706247460097075, 'learning_rate': 2.5490196078431373e-05, 'epoch': 4.9}


 50%|█████     | 510/1020 [29:19<23:59,  2.82s/it]
 50%|█████     | 510/1020 [29:41<23:59,  2.82s/it]

{'eval_loss': 0.30518102645874023, 'eval_accuracy': 0.9504950495049505, 'eval_precision': 0.9887640449438202, 'eval_recall': 0.9072164948453608, 'eval_f1': 0.946236559139785, 'eval_runtime': 21.5332, 'eval_samples_per_second': 9.381, 'eval_steps_per_second': 1.207, 'epoch': 5.0}


 60%|██████    | 612/1020 [35:06<18:59,  2.79s/it]  
 60%|██████    | 612/1020 [35:27<18:59,  2.79s/it]

{'eval_loss': 0.28991377353668213, 'eval_accuracy': 0.9504950495049505, 'eval_precision': 0.9887640449438202, 'eval_recall': 0.9072164948453608, 'eval_f1': 0.946236559139785, 'eval_runtime': 21.5166, 'eval_samples_per_second': 9.388, 'eval_steps_per_second': 1.208, 'epoch': 6.0}


 70%|███████   | 714/1020 [40:53<14:20,  2.81s/it]  
 70%|███████   | 714/1020 [41:15<14:20,  2.81s/it]

{'eval_loss': 0.3274470269680023, 'eval_accuracy': 0.9554455445544554, 'eval_precision': 0.9888888888888889, 'eval_recall': 0.9175257731958762, 'eval_f1': 0.9518716577540107, 'eval_runtime': 21.6985, 'eval_samples_per_second': 9.309, 'eval_steps_per_second': 1.198, 'epoch': 7.0}


 80%|████████  | 816/1020 [46:39<09:30,  2.80s/it]
 80%|████████  | 816/1020 [47:00<09:30,  2.80s/it]

{'eval_loss': 0.3132208585739136, 'eval_accuracy': 0.9554455445544554, 'eval_precision': 0.9888888888888889, 'eval_recall': 0.9175257731958762, 'eval_f1': 0.9518716577540107, 'eval_runtime': 21.3986, 'eval_samples_per_second': 9.44, 'eval_steps_per_second': 1.215, 'epoch': 8.0}


 90%|█████████ | 918/1020 [52:24<04:36,  2.71s/it]
 90%|█████████ | 918/1020 [52:46<04:36,  2.71s/it]

{'eval_loss': 0.31521517038345337, 'eval_accuracy': 0.9554455445544554, 'eval_precision': 0.9888888888888889, 'eval_recall': 0.9175257731958762, 'eval_f1': 0.9518716577540107, 'eval_runtime': 21.5873, 'eval_samples_per_second': 9.357, 'eval_steps_per_second': 1.204, 'epoch': 9.0}


 98%|█████████▊| 1000/1020 [57:08<01:04,  3.23s/it]

{'loss': 0.0004, 'grad_norm': 0.003112707519903779, 'learning_rate': 9.80392156862745e-07, 'epoch': 9.8}


100%|██████████| 1020/1020 [58:14<00:00,  2.71s/it]
100%|██████████| 1020/1020 [58:37<00:00,  3.45s/it]


{'eval_loss': 0.31980279088020325, 'eval_accuracy': 0.9554455445544554, 'eval_precision': 0.9888888888888889, 'eval_recall': 0.9175257731958762, 'eval_f1': 0.9518716577540107, 'eval_runtime': 21.1984, 'eval_samples_per_second': 9.529, 'eval_steps_per_second': 1.227, 'epoch': 10.0}
{'train_runtime': 3517.803, 'train_samples_per_second': 2.308, 'train_steps_per_second': 0.29, 'train_loss': 0.06421519715604875, 'epoch': 10.0}


100%|██████████| 26/26 [00:20<00:00,  1.25it/s]


In [96]:
# Save the best model
best_model.save_pretrained("transformer")
tokenizer.save_pretrained("transformer")

# Print average metrics across all folds
print(f"Average Accuracy: {np.mean(accuracy_scores)}")
print(f"Average Precision: {np.mean(precision_scores)}")
print(f"Average Recall: {np.mean(recall_scores)}")
print(f"Average F1 Score: {np.mean(f1_scores)}")
print("Best model saved as 'transformer'")

Average Accuracy: 0.9546359069404478
Average Precision: 0.9791545141516071
Average Recall: 0.929038057631747
Average F1 Score: 0.9531901196000738
Best model saved as 'transformer'


In [97]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import torch

# Load the pre-trained model and tokenizer from the folder
model = RobertaForSequenceClassification.from_pretrained('transformer')
tokenizer = RobertaTokenizer.from_pretrained('transformer')

# Load the new dataset (e.g., 'test.xlsx')
test_data = pd.read_excel('../test.xlsx')

# Extract the true labels for the test set
true_labels = test_data['kategori'].tolist()  # Assuming 'kategori_enc' has the true labels

# Preprocess the new data (tokenize and prepare the input)
test_texts = test_data['berita'].tolist()

# Tokenize the texts
encoding = tokenizer(test_texts, truncation=True, padding=True, max_length=216, return_tensors="pt")

# Get the input tensors
input_ids = encoding['input_ids']
attention_mask = encoding['attention_mask']

# Make predictions
model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # Disable gradient calculation during inference
    outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs.logits  # Model outputs raw logits

# Convert logits to predicted labels (using argmax to get the highest probability)
predictions = torch.argmax(logits, dim=-1)

# Convert predictions and true labels to numpy arrays
predicted_labels = predictions.numpy()
true_labels = torch.tensor(true_labels).numpy()

# Calculate metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

# Print the metrics
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


Accuracy: 0.951
Precision: 0.9447731755424064
Recall: 0.958
F1 Score: 0.9513406156901688


In [98]:
print(best_fold)

1
