# RNN From Scratch Implementation\n
    This notebook demonstrates RNN implementation from scratch using custom layers and compares it with Keras implementation.

## 1. Setup and Imports

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import pickle
import json
import os
import sys
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

## 2. Import From-Scratch Implementation

In [None]:
import sys
import os

# Get the current directory (notebook folder)
current_dir = os.getcwd()
# Go up one level to src/rnn, then import from_scratch
parent_dir = os.path.dirname(current_dir)
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

print(f"Current directory: {current_dir}")
print(f"Parent directory: {parent_dir}")

# Import our from-scratch implementation
try:
    from from_scratch.layers import (
        Embedding, SimpleRNN, Bidirectional, Dropout, 
        Dense, Softmax, RNNCell
    )
    from from_scratch.model import RNNModelFromScratch
    print("✅ Successfully imported from-scratch modules!")
except ImportError as e:
    print(f"Error importing from-scratch modules: {e}")
    print("Make sure the directory structure is correct")

## 3. Data Loading and Preprocessing

In [None]:
def load_nusax_sentiment_data():,
    "    \"\"\"\n",
    "    Load NusaX-Sentiment dataset from local CSV files\n",
    "    \"\"\"\n",
    "    try:\n",
    "        # Define paths to your dataset files\n",
    "        data_dir = \"../../../data/nusax_sentiment\"\n",
    "        train_file = os.path.join(data_dir, \"train.csv\")\n",
    "        valid_file = os.path.join(data_dir, \"valid.csv\")\n",
    "        test_file = os.path.join(data_dir, \"test.csv\")\n",
    "        \n",
    "        # Load the datasets\n",
    "        train_df = pd.read_csv(train_file)\n",
    "        valid_df = pd.read_csv(valid_file)\n",
    "        test_df = pd.read_csv(test_file)\n",
    "        \n",
    "        # Convert string labels to integer codes\n",
    "        label_categories = pd.Categorical(train_df['label'].values)\n",
    "        train_df['label'] = label_categories.codes\n",
    "        label_mapping = dict(enumerate(label_categories.categories))\n",
    "        \n",
    "        # Use the same mapping for valid and test\n",
    "        valid_df['label'] = pd.Categorical(valid_df['label'].values, categories=label_categories.categories).codes\n",
    "        test_df['label'] = pd.Categorical(test_df['label'].values, categories=label_categories.categories).codes\n",
    "        \n",
    "        print(\"Dataset loaded successfully from local files.\")\n",
    "        print(f\"Label mapping: {label_mapping}\")\n",
    "        \n",
    "        return train_df, valid_df, test_df, label_mapping\n",
    "        \n",
    "    except FileNotFoundError as e:\n",
    "        print(f\"Error: Could not find dataset files. {e}\")\n",
    "        print(\"Creating synthetic dataset for demonstration...\")\n",
    "        return create_synthetic_data()\n",
    "\n",
    "def create_synthetic_data():\n",
    "    \"\"\"\n",
    "    Create synthetic sentiment data for demonstration\n",
    "    \"\"\"\n",
    "    np.random.seed(42)\n",
    "    \n",
    "    # Sample texts for each sentiment\n",
    "    positive_texts = [\n",
    "        \"I love this product\", \"Great quality\", \"Excellent service\", \"Amazing experience\",\n",
    "        \"Perfect solution\", \"Highly recommend\", \"Outstanding quality\", \"Very satisfied\"\n",
    "    ] * 100\n",
    "    \n",
    "    negative_texts = [\n",
    "        \"Poor quality\", \"Terrible service\", \"Disappointing product\", \"Waste of money\",\n",
    "        \"Not recommended\", \"Bad experience\", \"Poor design\", \"Unsatisfied\"\n",
    "    ] * 100\n",
    "    \n",
    "    neutral_texts = [\n",
    "        \"Average product\", \"Okay service\", \"Nothing special\", \"Decent quality\",\n",
    "        \"Fair price\", \"Standard features\", \"Regular experience\", \"Normal product\"\n",
    "    ] * 100\n",
    "    \n",
    "    # Create DataFrame\n",
    "    texts = positive_texts + negative_texts + neutral_texts\n",
    "    labels = [2] * len(positive_texts) + [0] * len(negative_texts) + [1] * len(neutral_texts)\n",
    "    \n",
    "    # Shuffle data\n",
    "    indices = np.random.permutation(len(texts))\n",
    "    texts = [texts[i] for i in indices]\n",
    "    labels = [labels[i] for i in indices]\n",
    "    \n",
    "    df = pd.DataFrame({'text': texts, 'label': labels})\n",
    "    \n",
    "    # Split into train, valid, test\n",
    "    train_df = df[:1600]\n",
    "    valid_df = df[1600:2000]\n",
    "    test_df = df[2000:]\n",
    "    \n",
    "    label_mapping = {0: 'negative', 1: 'neutral', 2: 'positive'}\n",
    "    \n",
    "    print(\"Synthetic dataset created.\")\n",
    "    print(f\"Label mapping: {label_mapping}\")\n",
    "    \n",
    "    return train_df, valid_df, test_df, label_mapping\n",
    "\n",
    "# Load the dataset\n",
    "train_df, val_df, test_df, label_mapping = load_nusax_sentiment_data()\n",
    "\n",
    "print(f\"Train set size: {len(train_df)}\")\n",
    "print(f\"Validation set size: {len(val_df)}\")\n",
    "print(f\"Test set size: {len(test_df)}\")\n",
    "print(f\"\\nLabel distribution in training set:\")\n",
    "print(train_df['label'].value_counts().sort_index())"