In [37]:
import os
import json
import numpy as np
import tensorflow as tf
from keras import layers, Model, losses, optimizers
from langchain import LLMChain, PromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import pandas as pd

load_dotenv()

True

In [119]:
# IMPORTANT: CHANGE THIS FOR NEW TASK
TASK_ID = "classification_movie_sentiment_1"
OBJECTIVE = "binary sentiment classification of movie reviews"

In [124]:
def save_memory(df_memory: pd.DataFrame) -> None:
    """
    Persist the DataFrame to a .npy file as a structured (record) array.

    Args:
      df_memory: The DataFrame containing columns ['text','label','cycle','split'] (or similar).
      filepath:   Path where to write the .npy file.
    """
    records = df_memory.to_records(index=False)
    filepath = f"{TASK_ID}.npy"
    np.save(filepath, records)
    print(f"Memory saved to {filepath}")

def load_memory() -> pd.DataFrame:
    """
    Load a previously saved memory .npy file back into a DataFrame.

    Args:
      filepath: Path to the .npy file created by save_memory.

    Returns:
      A pandas DataFrame reconstructed from the record array.
    """
    try:
        filepath = f"{TASK_ID}.npy"
        records = np.load(filepath, allow_pickle=True)
        return pd.DataFrame.from_records(records)
    except:
        return pd.DataFrame(columns=["text", "label"])

In [130]:
print(df_memory.head(20))
print(df_memory.shape)

                                                 text label
0   I absolutely loved this movie, it was fantasti...     1
1   The film was a complete waste of time, very bo...     0
2   An outstanding performance by the lead actor, ...     1
3   I didn't enjoy the plot; it felt disjointed an...     0
4   A beautiful story with excellent cinematograph...     1
5   Terrible acting and a storyline that made no s...     0
6   The soundtrack was amazing and really helped s...     1
7    I fell asleep halfway through; it was that dull.     0
8   Such a heartwarming film, I would watch it again.     1
9   The dialogues were cheesy and the characters w...     0
10  This movie exceeded all my expectations, highl...     1
11  Poorly directed with many plot holes, not wort...     0
12  A brilliant script and superb acting made this...     1
13  I couldn't connect with the story or the chara...     0
14        Visually stunning and emotionally powerful.     1
15  The pacing was slow and the ending w

In [90]:
# ─── Teacher (LLM) Setup ───────────────────────────────────────────────────────

llm = ChatOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),  # alias for openai_api_key :contentReference[oaicite:1]{index=1}
    model="gpt-4.1-mini",                        # use 'model' instead of model_name :contentReference[oaicite:2]{index=2}
    temperature=0.7,
    max_retries=2
)

prompt_template = PromptTemplate(
    input_variables=["objective", "n_samples"],
    template=(
        "You are a data generator for the NLP task: {objective}.\n"
        "Generate exactly {n_samples} JSON objects in a list, each with:\n"
        "  - \"text\": a short example input\n"
        "  - \"label\": the correct output for that input. For classification task, the label is a number defined in the objective \n"
        "Return only valid JSON."
    )
)
generator_chain = LLMChain(llm=llm, prompt=prompt_template)

def cleanup_json(input_jsonstring: str):
    return input_jsonstring.replace("`","").replace("json","")

def generate_labels(resp,task_type:str):
    texts = []
    labels = []
    
    examples = json.loads(resp)
    if task_type == "text-classification":
        texts = [ex["text"] for ex in examples]
        labels = [int(ex["label"]) for ex in examples]
    return texts, labels


# ─── Refactored generate_examples ──────────────────────────────────────────────
def generate_examples(objective: str, n_samples: int):
    """
    1) Ask the Teacher LLM for n_samples examples,
    2) append them into the global df_memory,
    3) persist to disk,
    4) return texts & labels.
    """
    resp = generator_chain.run({"objective": objective, "n_samples": n_samples})
    resp = cleanup_json(resp)
    texts,labels = generate_labels(resp,"text-classification")

    # update in‐memory DataFrame and persist
    global df_memory
    df_new = pd.DataFrame({"text": texts, "label": labels})
    df_memory = pd.concat([df_memory, df_new], ignore_index=True)
    save_memory(df_memory)
    print("Memory updated")
    
    return texts, labels

# ─── New train/test split function ─────────────────────────────────────────────
def get_train_test(test_size: float = 0.2,
                   shuffle: bool = True,
                   random_state: int = None):
    """
    Load the full df_memory from disk, then randomly split into
    train/test DataFrames according to test_size fraction.
    """
    df = load_memory()
    if shuffle:
        df = df.sample(frac=1.0, random_state=random_state).reset_index(drop=True)
    n_test = int(len(df) * test_size)
    df_test  = df.iloc[:n_test].reset_index(drop=True)
    df_train = df.iloc[n_test:].reset_index(drop=True)
    return df_train, df_test


In [120]:
# ─── Student (RNN) & Preprocessing ─────────────────────────────────────────────

max_len    = 40
vocab_size = 10000
embed_dim  = 128
lstm_units = 128

vectorize_layer = layers.TextVectorization(
    max_tokens=vocab_size,
    output_mode='int',
    output_sequence_length=max_len
)

@tf.keras.utils.register_keras_serializable(
    package="Custom", name="StudentModel"
)
class StudentModel(Model):
    # TODO: need to make this architecture custom so that it can utilize reinforcement learning supervised by the teacher model
    def __init__(self, num_classes, **kwargs):
        """
        Args:
          num_classes: number of output classes
          **kwargs: passed to keras.Model (e.g. trainable, dtype)
        """
        super().__init__(**kwargs)
        self.num_classes = num_classes

        # Define your layers as before
        self.embedding  = layers.Embedding(input_dim=vocab_size,
                                           output_dim=embed_dim)
        self.bilstm     = layers.Bidirectional(
                            layers.LSTM(lstm_units)
                        )
        self.dense1     = layers.Dense(64, activation='relu')
        self.classifier = layers.Dense(num_classes,
                                       activation='softmax')
    
    def call(self, inputs, training=False):
        x = vectorize_layer(inputs)
        x = self.embedding(x)
        x = self.bilstm(x)
        x = self.dense1(x)
        return self.classifier(x)

    def get_config(self):
        """
        Return a serializable config dict for this model.
        Must include all __init__ args.
        """
        base_config = super().get_config()
        return {**base_config, "num_classes": self.num_classes}

    @classmethod
    def from_config(cls, config):
        """
        Reconstruct the model from its config.
        """
        return cls(**config)

In [121]:
# ─── Curriculum Loop ──────────────────────────────────────────────────────────

def curriculum_loop(
    objective: str,
    num_cycles: int = 5,
    N_train: int = 100,
    N_test: int = 50,
    batch_size: int = 16,
    lambda_punish: float = 0.5
):
    # Bootstrap: generate initial data and adapt vectorizer
    init_texts, init_labels = generate_examples(objective, N_train)
    print("[INIT BOOTSTRAP]: Examples generated")
    vectorize_layer.adapt(
        tf.data.Dataset.from_tensor_slices(init_texts).batch(batch_size)
    )
    
    num_classes = len(set(init_labels))
    print("[INIT BOOTSTRAP]: num_classes = ",num_classes)
    # Initialize Student model
    student = StudentModel(num_classes)
    student.compile(
        optimizer=optimizers.Adam(1e-3),
        loss=losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )
    print("[INIT BOOTSTRAP]: student model initialized")
    
    prev_score = 0.0

    for cycle in range(1, num_cycles + 1):
        print(f"\n=== Cycle {cycle} ===")

        # a) Augment memory with N_train new examples
        generate_examples(objective, N_train)
        print(f"[CYCLE {cycle}]: Generated {N_train} new examples; total memory = {len(df_memory)}")

        # b) Randomly split the full memory into train & test
        #    Here we aim for N_test examples, so compute fraction.
        test_frac = N_test / len(df_memory)
        df_train, df_test = get_train_test(test_size=test_frac, random_state=42)

        train_texts = df_train["text"].to_numpy()
        train_labels = df_train["label"].astype(np.int32).to_numpy()
        test_texts  = df_test["text"].to_numpy()
        test_labels  = df_test["label"].astype(np.int32).to_numpy()


        # c) Build tf.data pipelines from the DataFrames
        train_ds = tf.data.Dataset.from_tensor_slices((
            tf.constant(train_texts, dtype=tf.string),
            tf.constant(train_labels, dtype=tf.int32),
        )).batch(batch_size)

        test_ds = tf.data.Dataset.from_tensor_slices((
            tf.constant(test_texts, dtype=tf.string),
            tf.constant(test_labels, dtype=tf.int32),
        )).batch(batch_size)

        # d) Train for a few epochs
        student.fit(train_ds, epochs=5, verbose=1)

        # e) Evaluate on the held-out split
        loss, acc = student.evaluate(test_ds, verbose=0)
        score = float(acc)
        print(f"Test accuracy: {score:.4f}")

        # f) Punishment term (if score dropped)
        punish = max(0.0, prev_score - score)
        if punish > 0:
            print(f"Punishment λ·Δ = {lambda_punish * punish:.4f}")

        prev_score = score

    return student

### Training the curriculum loop

In [127]:
# ─── Execute Loop ─────────────────────────────────────────────────────────────
df_memory = load_memory()
if __name__ == "__main__":
    final_student = curriculum_loop(
        objective=OBJECTIVE,
        num_cycles=3,
        N_train=20,
        N_test=10
    )
    final_student.save("student_saved_model_tf2.keras")
    print("Done. Model saved to student_saved_model_tf2/")

Memory saved to classification_movie_sentiment_1.npy
Memory updated
[INIT BOOTSTRAP]: Examples generated
[INIT BOOTSTRAP]: num_classes =  2
[INIT BOOTSTRAP]: student model initialized

=== Cycle 1 ===
Memory saved to classification_movie_sentiment_1.npy
Memory updated
[CYCLE 1]: Generated 20 new examples; total memory = 40
Epoch 1/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 55ms/step - accuracy: 0.3042 - loss: 0.7011
Epoch 2/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.5625 - loss: 0.6864
Epoch 3/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.7597 - loss: 0.6809
Epoch 4/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.8028 - loss: 0.6751
Epoch 5/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.9125 - loss: 0.6655
Test accuracy: 0.5000

=== Cycle 2 ===
Memory saved to classification_movie_sentiment_1.npy
Mem

### Inference

In [129]:
# load model
model = tf.keras.models.load_model("student_saved_model_tf2.keras")

# single example as tf.constant of dtype string
sample_text = tf.constant(["It was awesome"])

# now predict
probs = model.predict(sample_text)    # shape (1, num_classes)
pred_idx = tf.argmax(probs, axis=1).numpy()[0]

print("Probabilities:", probs[0])
print("Predicted label:", pred_idx)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 270ms/step
Probabilities: [0.7978721  0.20212786]
Predicted label: 0
