# 🤖 AI for Ethereum - DQN Training Notebook
This notebook trains a Deep Q-Learning agent using a custom Ethereum-like Gym environment.

In [None]:
# Install dependencies
!pip install gym==0.26.2 torch numpy

In [None]:
# Environment definition
import gym
from gym import spaces
import numpy as np

class EthereumEnv(gym.Env):
    def __init__(self):
        super(EthereumEnv, self).__init__()
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=0, high=1, shape=(3,), dtype=np.float32)
        self.state = self._next_observation()
        self.step_count = 0
        self.max_steps = 100

    def _next_observation(self):
        return np.random.rand(3)

    def step(self, action):
        reward = self._calculate_reward(action)
        self.state = self._next_observation()
        self.step_count += 1
        done = self.step_count >= self.max_steps
        return self.state, reward, done, {}

    def reset(self):
        self.step_count = 0
        self.state = self._next_observation()
        return self.state

    def _calculate_reward(self, action):
        if action == 0:
            return np.random.uniform(0.5, 1.0)
        elif action == 1:
            return np.random.uniform(0.0, 0.5)
        else:
            return -0.1

In [None]:
# DQN Agent
import random
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque

class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )
    def forward(self, x):
        return self.model(x)

class DQNAgent:
    def __init__(self, state_dim, action_dim, lr=1e-3):
        self.model = DQN(state_dim, action_dim)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.loss_fn = nn.MSELoss()
        self.memory = deque(maxlen=10000)
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01

    def act(self, state):
        if random.random() < self.epsilon:
            return random.randint(0, 2)
        state = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            q_values = self.model(state)
        return torch.argmax(q_values).item()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train_step(self, batch_size=32):
        if len(self.memory) < batch_size:
            return
        batch = random.sample(self.memory, batch_size)
        state, action, reward, next_state, done = zip(*batch)

        state = torch.FloatTensor(state)
        next_state = torch.FloatTensor(next_state)
        reward = torch.FloatTensor(reward)
        done = torch.BoolTensor(done)
        action = torch.LongTensor(action)

        q_values = self.model(state)
        next_q = self.model(next_state).max(1)[0]
        target = reward + self.gamma * next_q * (~done)
        current = q_values.gather(1, action.unsqueeze(1)).squeeze()

        loss = self.loss_fn(current, target.detach())
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [None]:
# Training loop
env = EthereumEnv()
agent = DQNAgent(state_dim=3, action_dim=3)

EPISODES = 200

for ep in range(EPISODES):
    state = env.reset()
    total_reward = 0
    done = False

    while not done:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        agent.train_step()
        state = next_state
        total_reward += reward

    print(f"Episode {ep+1}, Total Reward: {total_reward:.2f}")