<a href="https://colab.research.google.com/github/saktiworkstation/reinforcement-learning-as-a-character/blob/main/Dynamic_Character_Evolution_v_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# Pastikan dependensi terinstall
# ok

In [11]:
# Pastikan menjalankan di Google Colab dengan runtime GPU jika memungkinkan
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Categorical
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, TensorDataset
import random
import numpy as np

# Set seed untuk reproduktifitas
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

#########################
# 1. Adaptive Personality Engine (RL)
#########################

class PolicyNetwork(nn.Module):
    """
    Jaringan untuk mempelajari strategi karakter berdasarkan state
    Menggunakan Fully Connected Layers untuk menghasilkan distribusi probabilitas atas aksi.
    """
    def __init__(self, state_size, action_size, hidden_size=128):
        super(PolicyNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.action_head = nn.Linear(hidden_size, action_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        action_probs = F.softmax(self.action_head(x), dim=-1)
        return action_probs

def train_rl_policy(policy_net, optimizer, episodes=1000, gamma=0.99):
    """
    Simulasi pelatihan RL dengan data dummy:
    - Menghasilkan state acak
    - Memilih aksi berdasarkan policy network
    - Mendapatkan reward dummy (misalnya reward positif jika aksi tertentu diambil)
    - Melakukan update policy dengan REINFORCE algorithm
    """
    policy_net.train()
    for episode in range(episodes):
        # Dummy state: vektor acak
        state = torch.FloatTensor(np.random.rand(1, 10))
        probs = policy_net(state)
        m = Categorical(probs)
        action = m.sample()

        # Dummy reward: misal, reward positif jika aksi 0 diambil
        reward = 1.0 if action.item() == 0 else -1.0

        # Hitung loss berdasarkan REINFORCE: -log(prob) * reward
        loss = -m.log_prob(action) * reward

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if episode % 200 == 0:
            print(f"[RL] Episode {episode}/{episodes}, Loss: {loss.item():.4f}")

#########################
# 2. Conversational Intelligence (NLP)
#########################

class ConversationalModule(nn.Module):
    """
    Modul NLP sederhana menggunakan embedding dan LSTM untuk menghasilkan respons percakapan.
    Data input berupa indeks token, dan model menghasilkan distribusi kata berikutnya.
    """
    def __init__(self, vocab_size, embed_size=64, hidden_size=128, num_layers=1):
        super(ConversationalModule, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, input_seq, hidden=None):
        embeds = self.embedding(input_seq)
        lstm_out, hidden = self.lstm(embeds, hidden)
        output = self.fc(lstm_out)
        return output, hidden

def train_conversational_module(conv_module, optimizer, vocab_size, epochs=5):
    """
    Simulasi pelatihan modul percakapan dengan data dummy:
    - Input berupa urutan token acak
    - Target adalah pergeseran urutan token (next word prediction)
    """
    conv_module.train()
    loss_fn = nn.CrossEntropyLoss()
    # Dummy dataset: 100 sequence, panjang 10
    num_sequences = 100
    seq_length = 10
    inputs = torch.randint(0, vocab_size, (num_sequences, seq_length))
    targets = torch.roll(inputs, shifts=-1, dims=1)  # target adalah pergeseran 1 posisi

    dataset = TensorDataset(inputs, targets)
    loader = DataLoader(dataset, batch_size=16, shuffle=True)

    for epoch in range(epochs):
        total_loss = 0.0
        for input_seq, target_seq in loader:
            optimizer.zero_grad()
            outputs, _ = conv_module(input_seq)
            # reshape outputs dan target untuk CrossEntropyLoss: (batch*seq, vocab_size) vs (batch*seq)
            loss = loss_fn(outputs.view(-1, vocab_size), target_seq.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"[NLP] Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(loader):.4f}")

#########################
# 3. Visual Perception and Learning (CNN)
#########################

class VisualPerceptionCNN(nn.Module):
    """
    CNN untuk pengenalan elemen visual. Contoh arsitektur sederhana untuk klasifikasi.
    """
    def __init__(self, num_classes=10):
        super(VisualPerceptionCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Output: [batch, 16, 16, 16] (misalnya input 32x32)
        x = self.pool(F.relu(self.conv2(x)))  # Output: [batch, 32, 8, 8]
        x = x.view(-1, 32 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def train_visual_cnn(cnn_module, optimizer, epochs=3):
    """
    Pelatihan CNN menggunakan dataset CIFAR-10 sebagai contoh.
    Dataset diunduh secara otomatis oleh torchvision.
    """
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
    trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

    loss_fn = nn.CrossEntropyLoss()
    cnn_module.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = cnn_module(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if i % 100 == 0:
                print(f"[CNN] Epoch {epoch+1}/{epochs}, Batch {i}, Loss: {loss.item():.4f}")
    print("Selesai melatih CNN.")

#########################
# 4. Real-Time Evolution Engine (Integrasi Semua Modul)
#########################

class DynamicCharacter(nn.Module):
    """
    Integrasi dari ketiga modul utama.
    - Menggunakan fitur state dari PolicyNetwork untuk adaptasi perilaku
    - Fitur NLP untuk dialog kontekstual
    - Fitur visual dari CNN untuk analisis lingkungan
    Output akhir berupa vektor representasi kepribadian yang dapat digunakan untuk menentukan respon/aksi.
    """
    def __init__(self, state_size, action_size, vocab_size, num_classes):
        super(DynamicCharacter, self).__init__()
        self.policy_net = PolicyNetwork(state_size, action_size)
        self.conv_module = ConversationalModule(vocab_size)
        self.visual_module = VisualPerceptionCNN(num_classes)
        # Layer integrasi (fusion) dari fitur-fitur yang diperoleh
        self.fc_fusion = nn.Linear(128 + 128 + 128, 256)
        self.fc_out = nn.Linear(256, action_size)  # Output dapat diinterpretasikan sebagai sinyal adaptasi strategi

    def forward(self, state, text_seq, image):
        # Dapatkan fitur dari masing-masing modul
        # 1. RL: Ambil fitur dari hidden layer sebelum output akhir
        rl_feat = F.relu(self.policy_net.fc1(state))

        # 2. NLP: Ambil hidden state dari LSTM (gunakan token pertama sebagai representasi)
        embeds = self.conv_module.embedding(text_seq)
        lstm_out, (hn, cn) = self.conv_module.lstm(embeds)
        nlp_feat = hn[-1]  # ambil hidden state dari layer terakhir

        # 3. Visual: Ambil fitur dari CNN (dari fc1)
        x = F.relu(self.visual_module.conv1(image))
        x = self.visual_module.pool(x)
        x = F.relu(self.visual_module.conv2(x))
        x = self.visual_module.pool(x)
        x = x.view(x.size(0), -1)
        visual_feat = F.relu(self.visual_module.fc1(x))

        # Fusion: Gabungkan ketiga fitur (asumsi dimensi disesuaikan)
        combined = torch.cat([rl_feat, nlp_feat, visual_feat], dim=1)
        fusion = F.relu(self.fc_fusion(combined))
        output = self.fc_out(fusion)
        return output

#########################
# 5. Simulasi Pelatihan dan Export Model
#########################

def train_dynamic_character(model, epochs=3):
    """
    Simulasi pelatihan gabungan dengan dummy data untuk masing-masing input:
    - state: vektor acak
    - text_seq: urutan token acak
    - image: citra acak (sesuaikan dengan ukuran yang diperlukan oleh CNN)
    """
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.MSELoss()  # Dummy loss, misal: perbedaan antara output dan target dummy

    model.train()
    for epoch in range(epochs):
        # Buat dummy input
        batch_size = 16
        state = torch.FloatTensor(np.random.rand(batch_size, 10))
        text_seq = torch.randint(0, 500, (batch_size, 10))  # vocab_size diasumsikan 500
        image = torch.randn(batch_size, 3, 32, 32)  # ukuran gambar 32x32
        target = torch.FloatTensor(np.random.rand(batch_size, model.fc_out.out_features))

        optimizer.zero_grad()
        output = model(state, text_seq, image)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        print(f"[DynamicCharacter] Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

# Inisialisasi parameter
state_size = 10
action_size = 4   # misal, 4 kemungkinan aksi/perilaku
vocab_size = 500  # untuk modul NLP
num_classes = 10  # untuk CNN, misal klasifikasi 10 kelas

# Inisialisasi masing-masing optimizer untuk modul RL, NLP, dan CNN
policy_net = PolicyNetwork(state_size, action_size)
optimizer_rl = optim.Adam(policy_net.parameters(), lr=0.001)

conv_module = ConversationalModule(vocab_size)
optimizer_nlp = optim.Adam(conv_module.parameters(), lr=0.001)

visual_module = VisualPerceptionCNN(num_classes)
optimizer_cnn = optim.Adam(visual_module.parameters(), lr=0.001)

# Latih masing-masing modul secara terpisah
print("Mulai pelatihan modul RL...")
train_rl_policy(policy_net, optimizer_rl, episodes=500)

print("\nMulai pelatihan modul NLP...")
train_conversational_module(conv_module, optimizer_nlp, vocab_size, epochs=3)

print("\nMulai pelatihan modul CNN...")
train_visual_cnn(visual_module, optimizer_cnn, epochs=1)  # gunakan 1 epoch untuk simulasi cepat

# Integrasi ke dalam model DynamicCharacter
dynamic_character = DynamicCharacter(state_size, action_size, vocab_size, num_classes)
# Salin bobot masing-masing modul yang sudah dilatih ke dalam model integrasi
dynamic_character.policy_net.load_state_dict(policy_net.state_dict())
dynamic_character.conv_module.load_state_dict(conv_module.state_dict())
dynamic_character.visual_module.load_state_dict(visual_module.state_dict())

print("\nMulai pelatihan Dynamic Character (integrasi)...")
train_dynamic_character(dynamic_character, epochs=3)

# Simpan model akhir ke file dengan ekstensi .nn
model_filename = "dynamic_character_model.nn"
torch.save(dynamic_character.state_dict(), model_filename)
print(f"\nModel telah disimpan sebagai {model_filename}")

Mulai pelatihan modul RL...
[RL] Episode 0/500, Loss: -1.4577
[RL] Episode 200/500, Loss: 0.0013
[RL] Episode 400/500, Loss: 0.0001

Mulai pelatihan modul NLP...
[NLP] Epoch 1/3, Loss: 6.2187
[NLP] Epoch 2/3, Loss: 6.1673
[NLP] Epoch 3/3, Loss: 6.1264

Mulai pelatihan modul CNN...
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:04<00:00, 41.3MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
[CNN] Epoch 1/1, Batch 0, Loss: 2.3018
[CNN] Epoch 1/1, Batch 100, Loss: 1.7802
[CNN] Epoch 1/1, Batch 200, Loss: 1.5083
[CNN] Epoch 1/1, Batch 300, Loss: 1.4672
[CNN] Epoch 1/1, Batch 400, Loss: 1.4815
[CNN] Epoch 1/1, Batch 500, Loss: 1.1563
[CNN] Epoch 1/1, Batch 600, Loss: 1.2887
[CNN] Epoch 1/1, Batch 700, Loss: 1.3157
Selesai melatih CNN.

Mulai pelatihan Dynamic Character (integrasi)...
[DynamicCharacter] Epoch 1/3, Loss: 0.9204
[DynamicCharacter] Epoch 2/3, Loss: 0.1407
[DynamicCharacter] Epoch 3/3, Loss: 0.1287

Model telah disimpan sebagai dynamic_character_model.nn
