# Bare Bones Environment Implementation
    import gym
    import gym_omnipath

    env = gym.make('omnipath-v0')
    done = False
    while not done:
        action = env.action_space.sample()
        state, reward, done, info = env.step(action)
        env.render()

In [1]:
import torch
from torch import nn, optim
import torch.nn.functional as F


from random import sample 

import gym
import gym_omnipath

In [2]:
class DQN(nn.Module):
    def __init__(self):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(4,100)
        self.fc2 = nn.Linear(100,4)

    def forward(self,x):
        out = self.fc1(x)
        out = self.fc2(out)
        return out

In [3]:
class State:
    def __init__(self,x,y):
        self.x = x
        self.y = y

class Transition:
    def __init__(self,s_state,f_state,action,reward,done):
        self.s_state = s_state
        self.f_state = f_state
        self.reward = reward
        self.done = done

class Memory:
    def __init__(self, size):
        self.size = size
        self.transitions = []

    def append(self, transition):
        self.transitions.append(transition)
        
    def sample(num):
        return sample(self.transitions,num)

    def __len__(self):
        return len(self.transitions)
    
class Agent:
    def __init__(self):
        observation_size = 4
        action_size = 4

        self.q_network = DQN(observation_size,action_size)
        self.target_network = DQN(observation_size,action_size)
        
        self.target_network.load_state_dict(self.q_network.state_dict())
    
    def act(self, state):
        out = self.q_network(state)
        out = torch.max(out,0)[1]
        return out
    

In [4]:
def train(epochs,model,env):
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.0001, weight_decay=0.001)
    goal = torch.FloatTensor([env.goal[0],env.goal[1],env.goal[0],env.goal[1]])
    
    done = False
    for e in range(epochs):
        running_loss = 0
        while not done:
            optimizer.zero_grad()
            if len(memory) < memory.size:
                action = env.action_space.sample()
                state, reward, done, info = env.step(action)
                memory.append(action)
                #print(len(memory))
            else:
                state, reward, done, info = env.step(action)
                state_tensor = torch.FloatTensor([state[0],state[1],state[2],state[3]])
                output = model(state_tensor)
                action = torch.argmax(output)
                print(output)
                
                
                #loss = criterion(output,zeros)
                #loss.backward(loss)
                #optimizer.step()
            
            env.render()
    return model
    

In [5]:

if __name__ == "__main__":
    try:
        env = gym.make('omnipath-v0')
        env.controllable()

        done = False

        memory = Memory(10)

        model = DQN()
        target = DQN()

        train(10,model,env)
    except KeyboardInterrupt:
        env.close()
        print('Environment Closed')
    
    



tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878, -8.2840], grad_fn=<AddBackward0>)
tensor([56.1348,  7.0435, 15.6878,

tensor([53.1188,  7.0613, 14.1317, -6.7892], grad_fn=<AddBackward0>)
tensor([52.9752,  7.0622, 14.0576, -6.7180], grad_fn=<AddBackward0>)
tensor([52.8316,  7.0630, 13.9835, -6.6469], grad_fn=<AddBackward0>)
tensor([52.6880,  7.0638, 13.9094, -6.5757], grad_fn=<AddBackward0>)
tensor([52.5443,  7.0647, 13.8353, -6.5045], grad_fn=<AddBackward0>)
tensor([52.4007,  7.0655, 13.7612, -6.4333], grad_fn=<AddBackward0>)
tensor([52.2571,  7.0664, 13.6871, -6.3621], grad_fn=<AddBackward0>)
tensor([52.1135,  7.0672, 13.6130, -6.2910], grad_fn=<AddBackward0>)
tensor([51.9699,  7.0681, 13.5389, -6.2198], grad_fn=<AddBackward0>)
tensor([51.8262,  7.0689, 13.4648, -6.1486], grad_fn=<AddBackward0>)
tensor([51.6826,  7.0698, 13.3907, -6.0774], grad_fn=<AddBackward0>)
tensor([51.5390,  7.0706, 13.3166, -6.0062], grad_fn=<AddBackward0>)
tensor([51.3954,  7.0715, 13.2425, -5.9351], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684,

tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684, -5.8639], grad_fn=<AddBackward0>)
tensor([51.2518,  7.0723, 13.1684,