In [1]:
import pandas as pd

# Load the Parquet file
df = pd.read_parquet('synthetic_scam_token_dataset.parquet')

# Display the first few rows
print(df.head())

# Check the dataset shape
print(f"\nDataset shape: {df.shape}")

# Optional: Check label distribution (Scam vs Legit)
print("\nLabel Distribution:")
print(df['label'].value_counts(normalize=True))


   liquidity_lock  top5_holder_pct  top10_holder_pct  volume_spike_ratio  \
0               0         0.329202          0.450914            1.052517   
1               1         0.254715          0.590134            0.888817   
2               1         0.393910          0.360236            0.683118   
3               0         0.808068          0.789527            1.053090   
4               0         0.822637          0.602806            1.194389   

   liquidity_drain_pct  comment_velocity  comment_spam_ratio  hype_score  \
0                 0.00                15            0.635036    0.861809   
1                 0.00                 2            0.225824    0.473315   
2                 0.00                 2            0.077117    0.556033   
3                 0.95                 7            0.645193    0.627983   
4                 0.95                11            0.829876    0.889861   

   price_spike_magnitude  price_crash_depth  time_to_crash_min  \
0               3.77

In [4]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

class ScamTokenEnv(gym.Env):
    def __init__(self, parquet_file):
        super(ScamTokenEnv, self).__init__()

        # Load dataset
        self.data = pd.read_parquet(parquet_file).reset_index(drop=True)

        # Extract features (exclude label)
        self.features = self.data.drop(columns=['label']).values
        self.labels = self.data['label'].values  # 1=scam, 0=legit

        # Observation space: based on feature dimensions
        self.observation_space = spaces.Box(
        low=np.min(self.features, axis=0).astype(np.float32),
        high=np.max(self.features, axis=0).astype(np.float32),
        dtype=np.float32
    )


        # Action space: 0 = Wait, 1 = Flag, 2 = Classify Legit, 3 = Classify Scam
        self.action_space = spaces.Discrete(4)

        # Environment state
        self.current_step = 0

    def reset(self):
        self.current_step = 0
        return self._get_observation()

    def step(self, action):
        obs = self._get_observation()
        true_label = self.labels[self.current_step]

        reward = 0
        done = False

        # Reward logic
        if action == 0:  # Wait
            reward = -0.01  # Small penalty for delay
        elif action == 1:  # Flag
            reward = -0.005  # Slightly lower penalty
        elif action == 2:  # Classify Legit
            reward = 1 if true_label == 0 else -1
            done = True
        elif action == 3:  # Classify Scam
            reward = 1 if true_label == 1 else -1
            done = True

        # Move to next sample
        self.current_step += 1
        if self.current_step >= len(self.data):
            done = True

        next_obs = self._get_observation() if not done else np.zeros(self.observation_space.shape)
        return next_obs, reward, done, {}

    def _get_observation(self):
        return self.features[self.current_step].astype(np.float32)

    def render(self, mode='human'):
        print(f"Step: {self.current_step}, Observation: {self._get_observation()}")



In [5]:
# Initialize environment
env = ScamTokenEnv('synthetic_scam_token_dataset.parquet')

# Example interaction loop
obs = env.reset()
done = False

while not done:
    env.render()
    action = env.action_space.sample()  # Random action for testing
    obs, reward, done, _ = env.step(action)
    print(f"Action: {action}, Reward: {reward}\n")


Step: 0, Observation: [ 0.          0.32920238  0.450914    1.0525174   0.         15.
  0.6350358   0.86180943  3.772848    0.90793365 17.509174    1.
  5.          0.          0.62509644]
Action: 2, Reward: 1



In [None]:
import gym
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

from scam_token_env import ScamTokenEnv  # Assuming your custom env is saved here
!pip install shimmy

# Initialize environment
def make_env():
    return ScamTokenEnv('synthetic_scam_token_dataset.parquet')

env = DummyVecEnv([make_env])

# Define DQN model
model = DQN(
    "MlpPolicy",
    env,
    learning_rate=1e-4,
    buffer_size=100_000,
    learning_starts=1_000,
    batch_size=64,
    tau=1.0,
    gamma=0.99,
    train_freq=4,
    target_update_interval=1_000,
    exploration_fraction=0.1,
    exploration_final_eps=0.02,
    verbose=1
)

# Train the agent
model.learn(total_timesteps=100_000)

# Evaluate the agent
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean Reward: {mean_reward}, Std Reward: {std_reward}")

# Save model
model.save("dqn_scam_token_detector")
