In [1]:
import pandas as pd

# Load test dataset
df = pd.read_excel("kaggletest.csv.xlsx")

# Display first few rows
df.head()

Unnamed: 0,id,topic
0,1001,The impact of AI on society
1,1002,The future of renewable energy
2,1003,The role of space exploration in modern science
3,1005,",Will robots replace human jobs?"


In [2]:
import gym
import numpy as np
from gym import spaces

class EssayGenerationEnv(gym.Env):
    def __init__(self, topics):
        super(EssayGenerationEnv, self).__init__()
        
        self.topics = topics  # List of topics
        self.current_idx = 0  # Track current essay index
        
        # Define action space (generated essay, encoded as tokens)
        self.action_space = spaces.Discrete(50257)  # GPT-2 token count
        
        # Define state space (essay topic)
        self.observation_space = spaces.Discrete(len(self.topics))

    def reset(self):
        """Reset environment and return the next topic"""
        self.current_idx = 0
        return self.topics[self.current_idx]
    
    def step(self, action):
        """Take an action (generate essay) and compute reward"""
        
        # Decode token IDs back into text (simulated essay)
        essay = tokenizer.decode(action)
        
        # Simulate LLM judge scores (replace this with real API later)
        score_variance = np.random.uniform(0.5, 3.0)  # Simulating disagreement
        
        # Compute reward (maximize variance)
        reward = score_variance  

        # Move to next topic
        self.current_idx += 1
        done = self.current_idx >= len(self.topics)
        
        next_state = self.topics[self.current_idx] if not done else None
        
        return next_state, reward, done, {}

# Load topics
topics = df["topic"].tolist()

# Create environment
env = EssayGenerationEnv(topics)


In [3]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")




In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

class PPOAgent(nn.Module):
    def __init__(self, model):
        super(PPOAgent, self).__init__()
        self.model = model
        self.optimizer = optim.AdamW(self.model.parameters(), lr=5e-5)

    def forward(self, input_ids):
        """Generate text using the model"""
        output = self.model.generate(input_ids, max_length=100)
        return output

    def compute_loss(self, old_probs, new_probs, rewards):
        """PPO loss function"""
        ratio = torch.exp(new_probs - old_probs)
        clip_adv = torch.clamp(ratio, 0.8, 1.2) * rewards
        loss = -torch.min(ratio * rewards, clip_adv).mean()
        return loss

    def train_step(self, old_probs, new_probs, rewards):
        """Optimize the policy"""
        loss = self.compute_loss(old_probs, new_probs, rewards)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.item()

# Initialize agent
agent = PPOAgent(model)


In [None]:
num_epochs = 3
batch_size = 8

for epoch in range(num_epochs):
    state = env.reset()
    done = False
    total_reward = 0
    
    while not done:
        # Convert topic to tokenized input
        input_ids = tokenizer.encode(state, return_tensors="pt")
        
        # Generate essay
        with torch.no_grad():
            action = agent(input_ids).squeeze().tolist()  # Ensure action is a list
        
        # Get new probabilities (for loss function)
        new_probs = torch.log(torch.tensor(1.0))  # Placeholder
        
        # Simulate reward (variance)
        next_state, reward, done, _ = env.step(action)
        total_reward += reward
        
        # Compute PPO loss and update model
        old_probs = new_probs.clone()
        reward_tensor = torch.tensor(float(reward), requires_grad=True)  # ✅ Fix applied here
        loss = agent.train_step(old_probs, new_probs, reward_tensor)
        
        print(f"Epoch {epoch}, Loss: {loss:.4f}, Reward: {reward:.2f}")
        
        state = next_state if not done else None

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Epoch 0, Loss: -1.4371, Reward: 1.44


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Epoch 0, Loss: -2.6970, Reward: 2.70


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Epoch 0, Loss: -1.3262, Reward: 1.33


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Epoch 0, Loss: -1.1783, Reward: 1.18


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Epoch 1, Loss: -2.7487, Reward: 2.75


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Epoch 1, Loss: -1.6760, Reward: 1.68


In [None]:
essays = []
for topic in topics:
    input_ids = tokenizer.encode(topic, return_tensors="pt")
    output = agent(input_ids)
    essay = tokenizer.decode(output[0], skip_special_tokens=True)
    essays.append(essay)

# Save results
df["essay"] = essays
df[["id", "essay"]].to_csv("submission.csv", index=False)

print("Submission file created: submission.csv")
