<a href="https://colab.research.google.com/github/bjungweapon/mjc.ai.ml/blob/BDU/BDU_algorithm_summary_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[ RNN model ]

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# 간단한 RNN 모델 정의
model = models.Sequential([
    layers.SimpleRNN(32, input_shape=(None, 1)), # (시간 단계, 특성 수)
    layers.Dense(1)
])

model.summary()

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=200, noise=0.2, random_state=0)
clf = MLPClassifier(hidden_layer_sizes=(10,), max_iter=1000)
clf.fit(X, y)
# Decision boundary 시각화

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import random

# 1. 더 많은 학습 데이터 (샘플 데이터 추가)
sentences = [
    'hello world this is a test',
    'this is another test example',
    'one more sequence for modeling',
    'language modeling with recurrent neural networks',
    'simple rnn for text generation',
    'deep learning for natural language processing',
    'understanding sequences with rnn',
    'generating text from a learned model',
    'predicting the next word in a sentence',
    'recurrent networks can learn dependencies',
    'jungbg test',
    'jungbg aaaaa',
    'jungbg test2',
    'junbg test3',
    'jungbg test4',
    'jungbg test5',
    'jungbg test6',
    'jungbg test7',
    'jungbg test8',
    'jungbg test9',
    'jungbg test10'
]

# 2. 텍스트 데이터 로딩 함수 (선택 사항)
def load_text_data(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        text = f.read().lower()
    return text.split('\n')

# 실제 텍스트 파일을 로드하려면 아래 주석을 해제하고 파일 경로를 지정하세요.
# file_path = 'your_text_file.txt'
# if os.path.exists(file_path):
#     sentences.extend(load_text_data(file_path))

# 3. 토크나이저 설정 및 시퀀스 생성
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
vocab_size = len(tokenizer.word_index) + 1

# 4. 입력/타겟 쌍 생성 (n-gram 방식)
X, y = [], []
for seq in sequences:
    for i in range(1, len(seq)):
        X.append(seq[:i])
        y.append(seq[i])
X = pad_sequences(X, padding='pre')
y = np.array(y)

# 5. 모델 정의
embedding_dim = 10
rnn_units = 32  # SimpleRNN 레이어의 유닛 수 증가
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=X.shape[1]),
    SimpleRNN(rnn_units),
    Dense(vocab_size, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary() # 모델 구조 확인

# 6. 학습
epochs = 20 # 에포크 수 증가
model.fit(X, y, epochs=epochs, verbose=1)

# 7. 예측 함수
def predict_next_word(model, tokenizer, seed_text, max_len):
    sequence = tokenizer.texts_to_sequences([seed_text])[0]
    sequence = pad_sequences([sequence], maxlen=max_len, padding='pre')
    prediction = model.predict(sequence, verbose=0)
    predicted_index = np.argmax(prediction)
    predicted_word = tokenizer.index_word.get(predicted_index)
    return predicted_word

# 8. 다양한 예측 시도
seed_texts = [
    'hello world',
    'this is',
    'one more',
    'predict',
    'jungbg'
]
max_sequence_len = X.shape[1]

print("\n--- 예측 결과 ---")
for seed_text in seed_texts:
    next_word = predict_next_word(model, tokenizer, seed_text, max_sequence_len)
    print(f"Seed text: '{seed_text}', Next word prediction: '{next_word}'")

print("\n--- 연속적인 예측 시도 ---")
num_predictions = 5
for seed_text in seed_texts:
    current_text = seed_text
    for _ in range(num_predictions):
        next_word = predict_next_word(model, tokenizer, current_text, max_sequence_len)
        if next_word is not None:
            current_text += ' ' + next_word
        else:
            break
    print(f"Seed text: '{seed_text}', Generated sequence: '{current_text}'")