<a href="https://colab.research.google.com/github/salzakartika1802/PROJECT-DEEP-LEARNING/blob/main/versipakeprompt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow



In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

# Fungsi untuk membuat generator sequences
def generate_sequences(file_path, tokenizer, max_sequence_len, batch_size=64, total_words=None):
    with open(file_path, "r", encoding="utf-8") as file:
        X, y = [], []
        for line in file:
            line = line.strip()
            if not line:  # Lewati baris kosong
                continue

            token_list = tokenizer.texts_to_sequences([line])[0]
            for i in range(1, len(token_list)):
                n_gram_sequence = token_list[:i+1]
                n_gram_sequence = pad_sequences([n_gram_sequence], maxlen=max_sequence_len, padding='pre')[0]
                X.append(n_gram_sequence[:-1])
                y.append(n_gram_sequence[-1])

                if len(X) == batch_size:
                    yield np.array(X), to_categorical(y, num_classes=total_words)
                    X, y = [], []

        if X:  # Yield sisa data jika ada
            yield np.array(X), to_categorical(y, num_classes=total_words)

# Fungsi untuk mengadaptasi prompt pengguna
def adapt_prompt(prompt, tokenizer, max_sequence_len):
    """
    Mengubah prompt menjadi seed text yang sesuai dengan model.
    Jika prompt terlalu panjang, potong hingga sesuai dengan max_sequence_len.
    """
    token_list = tokenizer.texts_to_sequences([prompt])[0]
    if len(token_list) > max_sequence_len - 1:
        token_list = token_list[-(max_sequence_len - 1):]  # Potong token berlebih
    return ' '.join([word for word, index in tokenizer.word_index.items() if index in token_list])

# Fungsi untuk sampling dengan temperature
def sample_with_temperature(predictions, temperature=1.0):
    predictions = np.log(predictions + 1e-8) / temperature  # Tambahkan epsilon untuk stabilitas numerik
    exp_preds = np.exp(predictions)
    probabilities = exp_preds / np.sum(exp_preds)
    return np.random.choice(len(probabilities), p=probabilities)

# Fungsi untuk menghasilkan cerita berdasarkan seed text
def generate_story(seed_text, next_words, max_sequence_len, temperature=1.0):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predictions = model.predict(token_list, verbose=0)[0]
        predicted = sample_with_temperature(predictions, temperature)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

# Parameter utama
file_path = "datasetnovel.txt"
max_sequence_len = 50
batch_size = 64

# 1. Persiapan Data
with open(file_path, "r", encoding="utf-8") as file:
    data = file.read()

# Inisialisasi tokenizer
tokenizer = Tokenizer(num_words=10000)  # Batasi kosakata hingga 10.000 kata
tokenizer.fit_on_texts([data])
total_words = len(tokenizer.word_index) + 1

# Hitung jumlah sequence yang valid untuk menentukan steps_per_epoch
sequence_count = 0
for line in data.split("\n"):
    line = line.strip()
    if not line:
        continue
    token_list = tokenizer.texts_to_sequences([line])[0]
    sequence_count += max(0, len(token_list) - 1)

steps_per_epoch = (sequence_count + batch_size - 1) // batch_size

# 2. Membangun Model
model = Sequential([
    Embedding(total_words, 64, input_length=max_sequence_len-1),
    LSTM(100),
    Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 3. Melatih Model dengan Generator dan Early Stopping
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

model.fit(
    x=generate_sequences(file_path, tokenizer, max_sequence_len, batch_size=batch_size, total_words=total_words),
    steps_per_epoch=steps_per_epoch,
    epochs=10,
    verbose=1,
    callbacks=[early_stopping]
)

# 4. Input dari Pengguna
prompt = input("Berikan prompt (contoh: 'give a story about the romance between a man and a woman'): ")
next_words = int(input("Masukkan jumlah kata yang ingin dihasilkan: "))

# Adaptasi prompt untuk memastikan panjangnya sesuai model
seed_text = adapt_prompt(prompt, tokenizer, max_sequence_len)

# Menghasilkan teks berdasarkan prompt
generated_text = generate_story(seed_text, next_words=next_words, max_sequence_len=max_sequence_len, temperature=1.0)

# Menampilkan hasil
print("\nGenerated Story:")
print(generated_text)

# Opsional: Simpan hasil ke file
save_to_file = input("\nApakah Anda ingin menyimpan hasil ke file? (y/n): ").strip().lower()
if save_to_file == "y":
    with open("generated_story.txt", "w", encoding="utf-8") as output_file:
        output_file.write(generated_text)
    print("Hasil teks disimpan di 'generated_story.txt'.")


Epoch 1/10




[1m9620/9620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m864s[0m 89ms/step - accuracy: 0.1943 - loss: 4.8490
Epoch 2/10
[1m9620/9620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10
[1m9620/9620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 4/10
[1m9620/9620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m9620/9620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00


  self.gen.throw(typ, value, traceback)


Berikan prompt (contoh: 'give a story about the romance between a man and a woman'): give a story about triangle romance between a man and 2 women
Masukkan jumlah kata yang ingin dihasilkan: 500

Generated Story:
a and about story give man between 2 in her outdoor song to see what get gone on or miles for his next adventure on his home for being teddy there came to leave deep on popped all the magical powers to spin space another new friend hunt in the fireflies to the sky before playing above the big tree and chirped out of her delicious next magical thing to smell the little fur smiled and gently searched looking to a real pictures in her stripes and keep the cross friends but names beep's making amazing things to having a special detective sparkling outdoor blue in the fire chest ziggy grew so much sidewalks nuts at need her friends and piggy ahoy food and discover it or the flamingos strength to tree to sit in the garden of their colorful picnic splendid fred had toybox lighting wh