In [1]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

#Download Dataset (Shakespear)

In [None]:
!wget --no-check-certificate \
    https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt \
    -O shakespeare.txt

--2025-06-12 13:55:55--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘shakespeare.txt’


2025-06-12 13:55:55 (21.4 MB/s) - ‘shakespeare.txt’ saved [1115394/1115394]



#Load Dataset

In [None]:
# Baca file teks yang sudah di-download
with open('shakespeare.txt', 'r') as f:
    text = f.read()

print("Download berhasil!")
print("\nBerikut 300 karakter pertama dari naskah Shakespeare:\n")
print(text[:300])

Download berhasil!

Berikut 300 karakter pertama dari naskah Shakespeare:

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us


#Exploratory Data Analysis (EDA)

In [None]:
# 'text' adalah variabel dari langkah sebelumnya yang berisi seluruh naskah

# Bangun kosakata (karakter unik) dan urutkan
chars = sorted(list(set(text)))
vocab_size = len(chars)

print(f'Total karakter dalam naskah: {len(text)}')
print(f'Total karakter unik (kosakata): {vocab_size}')
print('\nBerikut adalah kosakata yang akan dipelajari model:')
print(chars)

Total karakter dalam naskah: 1115394
Total karakter unik (kosakata): 65

Berikut adalah kosakata yang akan dipelajari model:
['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [None]:
# Membuat kamus karakter -> integer
char_to_int = {char: i for i, char in enumerate(chars)}

# Membuat kamus integer -> karakter
int_to_char = {i: char for i, char in enumerate(chars)}

# Coba tes kamusnya
print("\nContoh mapping dari char_to_int:")
print(f"'h' -> {char_to_int['h']}")
print(f"'a' -> {char_to_int['a']}")

print("\nContoh mapping dari int_to_char:")
print(f"53 -> '{int_to_char[53]}'")
print(f"46 -> '{int_to_char[46]}'")


Contoh mapping dari char_to_int:
'h' -> 46
'a' -> 39

Contoh mapping dari int_to_char:
53 -> 'o'
46 -> 'h'


##Linguistic EDA Summary

* **Corpus Size:** Naskah yang akan kita gunakan sebagai bahan belajar terdiri dari [isi di sini, misal: 1,115,394] total karakter.
* **Vocabulary Size:** "Alfabet" yang akan dipelajari oleh model kita terdiri dari **[isi di sini, misal: 65]** karakter unik, mencakup huruf kecil, huruf besar, spasi, tanda baca, dan karakter spesial lainnya.
* **Mapping:** Kamus untuk translasi dua arah (`char <-> int`) telah berhasil dibuat. Ini adalah langkah fundamental untuk proses *vectorization* data teks kita di tahap selanjutnya.

#Data Preprocessing

In [None]:
# 'text' adalah variabel dari Step 2, 'char_to_int' dari Step 3

# Tentukan panjang setiap urutan "soal"
seq_length = 100

# Siapkan list kosong untuk menampung "kartu latihan"
sequences = []
labels = []

# Proses pemotongan naskah
for i in range(0, len(text) - seq_length, 1):
    # Ambil 100 karakter sebagai input
    input_seq = text[i:i + seq_length]

    # Ambil 1 karakter setelahnya sebagai output
    output_char = text[i + seq_length]

    # Masukkan ke dalam list
    sequences.append([char_to_int[char] for char in input_seq])
    labels.append(char_to_int[output_char])

# Hitung jumlah "kartu latihan" yang berhasil kita buat
n_patterns = len(sequences)
print(f"Total 'kartu latihan' (pola) yang dibuat: {n_patterns}")

Total 'kartu latihan' (pola) yang dibuat: 1115294


In [None]:
import numpy as np
from tensorflow.keras.utils import to_categorical

# Ubah list input menjadi array NumPy
X = np.reshape(sequences, (n_patterns, seq_length, 1))

# Normalisasi nilai integer menjadi 0-1 (ini membantu proses belajar)
X = X / float(vocab_size)

# One-hot encode output (labels)
y = to_categorical(labels)

# Cek bentuk akhir dari data kita
print("Bentuk akhir dari X (Input):", X.shape)
print("Bentuk akhir dari y (Output):", y.shape)

Bentuk akhir dari X (Input): (1115294, 100, 1)
Bentuk akhir dari y (Output): (1115294, 65)


#Build Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# --- Merakit Model Sequential ---
model = Sequential([
    # Layer 1: LSTM Pertama (Manajer Lapangan)
    # Dia memproses karakter mentah.
    # `return_sequences=True` adalah KUNCI. Ini memerintahkan layer ini
    # untuk tidak hanya memberikan laporan akhir, tapi memberikan laporan
    # di SETIAP langkah waktu (setiap karakter) ke layer selanjutnya.
    LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True),
    Dropout(0.2),

    # Layer 2: LSTM Kedua (Manajer Senior)
    # Dia tidak lagi melihat karakter. Dia menerima "laporan urutan"
    # dari manajer lapangan dan mencari pola yang lebih besar.
    LSTM(256),
    Dropout(0.2),

    # Layer 3: Output Layer (Panel Juri)
    # Tetap sama, membuat keputusan akhir.
    Dense(y.shape[1], activation='softmax')
])

  super().__init__(**kwargs)


In [None]:
# Compile model
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam'
)

In [None]:
# Tampilkan ringkasan arsitektur model
model.summary()

#Training Model

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

# Tentukan nama file untuk menyimpan "checkpoint"
filepath="/content/drive/My Drive/Colab_Checkpoints/model_weights_epoch_{epoch:02d}_loss_{loss:.4f}.keras"

# Buat callback ModelCheckpoint
# - monitor='loss': kita pantau nilai loss
# - verbose=1: tampilkan pesan waktu menyimpan
# - save_best_only=True: hanya simpan jika loss-nya lebih baik dari sebelumnya
# - mode='min': kita mau nilai loss sekecil mungkin
checkpoint = ModelCheckpoint(filepath,
                             monitor='loss',
                             verbose=1,
                             save_best_only=True,
                             mode='min')
callbacks_list = [checkpoint]

# --- Mulai Proses Pelatihan Model ---
# Kita akan latih untuk 20 epoch dulu, lo bisa tambah kalo mau
history = model.fit(
    X, y,
    epochs=20,
    batch_size=128,
    callbacks=callbacks_list
)

Epoch 1/20
[1m8713/8714[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 30ms/step - loss: 2.9972
Epoch 1: loss improved from inf to 2.77533, saving model to /content/drive/My Drive/Colab_Checkpoints/model_weights_epoch_01_loss_2.7753.keras
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 30ms/step - loss: 2.9971
Epoch 2/20
[1m8713/8714[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 30ms/step - loss: 2.4413
Epoch 2: loss improved from 2.77533 to 2.38148, saving model to /content/drive/My Drive/Colab_Checkpoints/model_weights_epoch_02_loss_2.3815.keras
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 30ms/step - loss: 2.4413
Epoch 3/20
[1m8713/8714[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 30ms/step - loss: 2.2356
Epoch 3: loss improved from 2.38148 to 2.20239, saving model to /content/drive/My Drive/Colab_Checkpoints/model_weights_epoch_03_loss_2.2024.keras
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

#Test Model

In [None]:
import numpy as np
import sys

# Load model TERBAIK yang terakhir (loss 1.69)
# Ganti nama file jika perlu
best_model_filename = '/content/drive/My Drive/Colab_Checkpoints/model_3rd_train_loss_1.6290.keras'
model.load_weights(best_model_filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# --- Fungsi Generate Teks dengan "Tombol Temperature" ---
def generate_text_with_temp(seed_pattern, temp=1.0, chars_to_gen=500):
    print(f"\n--- Generating text with temperature: {temp} ---")

    pattern = seed_pattern
    sys.stdout.write("\"" + ''.join([int_to_char[value] for value in pattern]) + "\"")
    sys.stdout.write("\n\nHasil Karya AI:\n")

    for i in range(chars_to_gen):
        x = np.reshape(pattern, (1, len(pattern), 1))
        x = x / float(vocab_size)

        # Prediksi probabilitas untuk semua karakter
        preds = model.predict(x, verbose=0)[0]

        # Di sinilah "sihir" temperature bekerja
        preds = np.asarray(preds).astype('float64')
        preds = np.log(preds) / temp
        exp_preds = np.exp(preds)
        preds = exp_preds / np.sum(exp_preds)

        # Pilih karakter secara acak berdasarkan probabilitas yang baru
        probas = np.random.multinomial(1, preds, 1)
        index = np.argmax(probas)

        result = int_to_char[index]
        sys.stdout.write(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]
    print("\n\nDone.")

# --- Mari Kita Lakukan Eksperimen ---
# Dapatkan satu "umpan" acak
start = np.random.randint(0, len(sequences)-1)
initial_pattern = sequences[start]

# Coba dengan beberapa temperature berbeda
generate_text_with_temp(list(initial_pattern), temp=0.2)
generate_text_with_temp(list(initial_pattern), temp=0.5)
generate_text_with_temp(list(initial_pattern), temp=1.0)
generate_text_with_temp(list(initial_pattern), temp=1.2)


--- Generating text with temperature: 0.2 ---
"e. Lend me thy hand,
I'll help thee: come, lend me thy hand.

AUTOLYCUS:
O, good sir, tenderly, O!

"

Hasil Karya AI:
SICINIUS:
H co not see the court

  preds = np.log(preds) / temp


e of the sortow shall be shall be a shall
be to the sea to the company of the were and so war a that I have to
the man of the courte of the house of the hand.

SICINIUS:
I co more sear the prince of the sunrd and the sente
of the fortune of the well of the courte of the sene
to the present of the fortune and men that shall be so
that the company and the man so the wiile of the courte
of the prove before that the prince of the secei
to me and to the common siale of

Done.

--- Generating text with temperature: 0.5 ---
"e. Lend me thy hand,
I'll help thee: come, lend me thy hand.

AUTOLYCUS:
O, good sir, tenderly, O!

"

Hasil Karya AI:
MISANDA:
G shall not come to here to that your for the care. You, met him the
suandsmes his houre to call me. though that
is not to our countel so report the sortow in his soaree
to the word to father as the brown nnt eor the
tiru in the semve and for tseasons than a like perton
to the warrent to the purpose of the srueas hrartised
of the dear suill bouh 

#Lanjut Process Training

In [None]:
# --- Kita lanjutkan proses training ---

# latih untuk 30 epoch tambahan.
# `initial_epoch=20` memberi tahu model untuk memulai hitungan dari epoch ke-20,
# jadi totalnya nanti akan jadi 50 epoch.
marathon_history = model.fit(
    X, y,
    epochs=40,
    initial_epoch=21,  # <-- Melanjutkan dari epoch 20
    batch_size=128,
    callbacks=callbacks_list
)

Epoch 22/40
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 1.6801
Epoch 22: loss improved from 1.68986 to 1.68258, saving model to /content/drive/My Drive/Colab_Checkpoints/model_weights_epoch_22_loss_1.6826.keras
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 30ms/step - loss: 1.6801
Epoch 23/40
[1m8713/8714[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 30ms/step - loss: 1.6728
Epoch 23: loss improved from 1.68258 to 1.67635, saving model to /content/drive/My Drive/Colab_Checkpoints/model_weights_epoch_23_loss_1.6763.keras
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 30ms/step - loss: 1.6728
Epoch 24/40
[1m8713/8714[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 30ms/step - loss: 1.6700
Epoch 24: loss improved from 1.67635 to 1.67123, saving model to /content/drive/My Drive/Colab_Checkpoints/model_weights_epoch_24_loss_1.6712.keras
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37