In [138]:
import pandas as pd
import gym
from collections import defaultdict
import numpy as np

class CitizenBankEnv(gym.Env):
    def __init__(self):
        df = pd.read_csv('../logical_links.csv')
        
        self.df_device = df[['DeviceA']].drop_duplicates().reset_index(drop=True).reset_index(drop=False)
        self.df = df.merge(self.df_device, how='inner', on=['DeviceA']).rename(columns={'index':'DeviceA_Code'}). \
            merge(self.df_device,how='inner', left_on='DeviceBName', right_on=['DeviceA']).rename(columns={'index':'DeviceB_Code'}) \
            .drop('DeviceA_y', axis=1)
        
        self.num_devices = self.df_device.shape[0]
        self.df_device.to_csv("devices.csv", index=False)
        self.df.to_csv('links.csv', index=False)
        self.weight = 1.
        self.topology_graph()
        self.reset()
    
    #Assuming the weight are the same
    def topology_graph(self) -> dict[str, list]:
        """Build a graph for the topology."""
        edges = []
        self.graph = defaultdict(list)
        for link in self.df[['DeviceA_Code', 'DeviceB_Code']].values:
            start = link[0]
            end = link[1]
            edges.append([start, end])
            
        for edge in edges:
            first,second = edge[0], edge[1]
            self.graph[first].append(second)
            self.graph[second].append(first)
            
        return
    
    def build_costs(self):
        self.costs = np.zeros((self.num_devices,self.num_devices))
        values = self.df['Bandwidth (Mbps)'].values
        index = self.df[['DeviceA_Code','DeviceB_Code']].values
        self.costs[index[:,0], index[:,1]] = values
     
    def reset(self):
        #state includes the current position and dest
        self.state = np.random.choice(range(self.num_devices), 2)
        return self.state       
    
    def step(self, action):
        done=False
        
        if action == self.state[1]:
            done=True
        
        reward = -self.costs[self.state[0], action]
        state_next = np.array([action, self.state[1]])
        
        return state_next, reward, done       
        
    
    def render(self):
        pass
    

In [140]:
env = CitizenBankEnv()
env.build_costs()
for i in range(10):
    state_next, r, done = env.step(i)
    print(state_next, r, done)

[ 0 13] -0.0 False
[ 1 13] -0.0 False
[ 2 13] -0.0 False
[ 3 13] -0.0 False
[ 4 13] -0.0 False
[ 5 13] -0.0 False
[ 6 13] -0.0 False
[ 7 13] -0.0 False
[ 8 13] -0.0 False
[ 9 13] -1000.0 False


In [132]:
env.topology_graph()