In [1]:
import gym
from time import sleep
from IPython.display import clear_output, display
import matplotlib.pyplot as plt
import importlib
from stable_baselines3.common.env_checker import check_env

import optical_network_game.game_gym
importlib.reload(optical_network_game.game_gym)
from optical_network_game.game_gym import *

from optical_network_game.requests import *
from optical_network_game.topology_generation import *

import optical_network_game.heuristic
importlib.reload(optical_network_game.heuristic)
from optical_network_game.heuristic import *

from stable_baselines3 import DQN
import pandas as pd

pygame 2.0.3 (SDL 2.0.16, Python 3.9.7)
Hello from the pygame community. https://www.pygame.org/contribute.html


### Loading RL Model to use

In [2]:
#Insert model_name here based on file name
#model_name =
TL_5_model = DQN.load("DQN_VSNL_TL_5.zip", device='cuda')
TL_10_model = DQN.load("DQN_VSNL_TL_10_ver2.zip", device='cuda')
TL_15_model = DQN.load("DQN_VSNL_TL_15.zip", device='cuda')

In [3]:
#Parameters for results collection
#Holdtime = 10 to 40 (For a traffic load of 5 to 20)
#Number of connection requests = 20
num_req = 20
#request interval = 2 seconds
req_int = 2
#time limit for each connection request = 10 seconds
time_limit = req_int + 1
#bandwidth per link = 5
link_BW = 5

### Obtaining Results<br/>Heuristic Algorithm

In [4]:
#setting up the VSNL topology
nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)

#Creating list to store results
results_heuristic = []

#Outer for loop to set hold time from 10 to 40 with increments of 2 to allow for collection of traffic load from 5 to 20 with step of 1
for holdtime in range(10,41):
    
    #number of episodes flag for the inner while loop
    num_episodes = 0

    results_list = {}

    #generating request lists and game environment
    requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=holdtime, time_limit=time_limit)
    user = User()
    env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
    eveon = env
    check_env(env)
    heuristic = Heuristic(linkList)
    obs = env.reset()
    
    #for each traffic load setting the game runs for 30 episodes
    while num_episodes < 30:

        action = heuristic.next_action(obs)
        obs, rewards, dones, info = env.step(action)

        if dones == True:
            print(info)

            results_list = info
            results_list['traffic_load'] = holdtime/req_int
            
            #appending the final performance results into the results list for further processing
            results_heuristic.append(info)
            env.reset()
            heuristic = Heuristic(linkList)
            num_episodes += 1
            #debug print
            #print("Episode: " + str(num_episodes))

        env.render()

VSNL Topology Selected
Traffic load is: 5.0
No more requests.
Total reward for this episode is 38298.999999999985
{'bp': 0.0, 'avg_length': 2.9, 'blocked_continuous': 0, 'blocked_contiguous': 0}
No more requests.
Total reward for this episode is 38272.99999999998
{'bp': 0.0, 'avg_length': 2.9, 'blocked_continuous': 0, 'blocked_contiguous': 0}
No more requests.
Total reward for this episode is 38218.999999999985
{'bp': 0.0, 'avg_length': 2.9, 'blocked_continuous': 0, 'blocked_contiguous': 0}
No more requests.
Total reward for this episode is 39383.999999999985
{'bp': 0.0, 'avg_length': 2.8, 'blocked_continuous': 0, 'blocked_contiguous': 0}
No more requests.
Total reward for this episode is 38262.999999999985
{'bp': 0.0, 'avg_length': 2.9, 'blocked_continuous': 0, 'blocked_contiguous': 0}
No more requests.
Total reward for this episode is 38263.99999999998
{'bp': 0.0, 'avg_length': 2.9, 'blocked_continuous': 0, 'blocked_contiguous': 0}
No more requests.
Total reward for this episode is 3

In [5]:
#after for loop is completed, save the results list as a csv file
df_heuristic = pd.DataFrame(results_heuristic)
df_heuristic
# saving the dataframe 
#INSERT PROPER RESULTS NAME HERE
df_heuristic.to_csv('results_heuristic.csv')

### RL Model Results

## Traffic Load 5

In [4]:
#setting up the VSNL topology
nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)

results_RL_TL_5 = []
    
#number of episodes flag for the inner while loop
num_episodes = 0

results_list = {}

#generating request lists and game environment
requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=10, time_limit=time_limit, seed_no=42)
user = User()
env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
eveon = env
check_env(env)
heuristic = Heuristic(linkList)
obs = env.reset()

#for each traffic load setting the game runs for 30 episodes
while num_episodes < 5:

    action, states_ = TL_5_model.predict(obs, deterministic=True )
    obs, rewards, dones, info = env.step(action)

    if dones == True:
        #print(info)

        #results_list = info
        #results_list['traffic_load'] = holdtime/req_int
        
        #appending the final performance results into the results list for further processing
        results_RL_TL_5.append(info)
        env.reset()
        heuristic = Heuristic(linkList)
        num_episodes += 1
        #debug print
        #print("Episode: " + str(num_episodes))

    env.render()



VSNL Topology Selected
42
Traffic load is: 5.0
No more requests.
Total reward for this episode is 7644.0
Stop hitting yourself
No more requests.
Total reward for this episode is 1253.000000000001
Stop hitting yourself
No more requests.
Total reward for this episode is -460.99999999999426
Stop hitting yourself
No more requests.
Total reward for this episode is 13830.000000000007
No more requests.
Total reward for this episode is 33028.00000000002


In [4]:
#setting up the VSNL topology
nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)

results_RL_TL_5 = []

#Outer for loop to set hold time from 10 to 40 with increments of 2 to allow for collection of traffic load from 5 to 20 with step of 1
for holdtime in range(10,41):
    
    #number of episodes flag for the inner while loop
    num_episodes = 0

    results_list = {}
    
    #generating request lists and game environment
    requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=holdtime, time_limit=time_limit)
    user = User()
    env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
    eveon = env
    check_env(env)
    heuristic = Heuristic(linkList)
    obs = env.reset()
    
    #for each traffic load setting the game runs for 30 episodes
    while num_episodes < 30:

        action, states_ = TL_5_model.predict(obs, deterministic=True )
        obs, rewards, dones, info = env.step(action)

        if dones == True:
            #print(info)

            results_list = info
            results_list['traffic_load'] = holdtime/req_int
            
            #appending the final performance results into the results list for further processing
            results_RL_TL_5.append(info)
            env.reset()
            heuristic = Heuristic(linkList)
            num_episodes += 1
            #debug print
            #print("Episode: " + str(num_episodes))

        env.render()

VSNL Topology Selected
Traffic load is: 5.0
Stop hitting yourself
No more requests.
Total reward for this episode is -18833.0
Stop hitting yourself
No more requests.
Total reward for this episode is 11960.000000000007
Stop hitting yourself
No more requests.
Total reward for this episode is 9866.000000000007
Stop hitting yourself
No more requests.
Total reward for this episode is 11926.0
Stop hitting yourself
No more requests.
Total reward for this episode is 633.0000000000013
Stop hitting yourself
No more requests.
Total reward for this episode is 11051.0
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is 13891.000000000002
Stop hitting yourself
No more requests.
Total reward for this episode is 27238.0
No more requests.
Total reward for this episode is 25304.000000000007
Stop hitting yourself
No more requests.
Total reward for this episode is -1090.0000000000005
Stop hitting yourself
No more requests.
Total reward for this episode is 8039.00

In [5]:
#after for loop is completed, save the results list as a csv file
df_TL_5 = pd.DataFrame(results_RL_TL_5)
# saving the dataframe 
#INSERT PROPER RESULTS NAME HERE
df_TL_5.to_csv('results_RL_TL_5.csv')

## Traffic Load 10

In [5]:
#setting up the VSNL topology
nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)

results_RL_TL_10 = []

    
#number of episodes flag for the inner while loop
num_episodes = 0

results_list = {}

#generating request lists and game environment
requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=20, time_limit=time_limit, seed_no=42)
user = User()
env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
eveon = env
check_env(env)
heuristic = Heuristic(linkList)
obs = env.reset()

#for each traffic load setting the game runs for 30 episodes
while num_episodes < 5:

    action, states_ = TL_10_model.predict(obs, deterministic=True )
    obs, rewards, dones, info = env.step(action)

    if dones == True:
        #print(info)

        #results_list = info
        #results_list['traffic_load'] = holdtime/req_int
        
        #appending the final performance results into the results list for further processing
        results_RL_TL_10.append(info)
        env.reset()
        heuristic = Heuristic(linkList)
        num_episodes += 1
        #debug print
        #print("Episode: " + str(num_episodes))

    env.render()

VSNL Topology Selected
42
Traffic load is: 10.0
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -24465.999999999996
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -10693.0
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -14035.0
Stop hitting yourself
No more requests.
Total reward for this episode is 539.00000000001
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -24553.0


Code to run results for 30 different random seeds per traffic load

In [4]:
def run_results_diff_seed(seed_no):
    TL_10_model = DQN.load("DQN_VSNL_TL_10_ver2.zip", device='cuda')
    #Parameters for results collection
    #Holdtime = 10 to 40 (For a traffic load of 5 to 20)
    #Number of connection requests = 20
    num_req = 20
    #request interval = 2 seconds
    req_int = 2
    #time limit for each connection request = 10 seconds
    time_limit = req_int + 1
    #bandwidth per link = 5
    link_BW = 5

    #setting up the VSNL topology
    nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)

    results_RL_TL_10 = []

    #Outer for loop to set hold time from 10 to 40 with increments of 2 to allow for collection of traffic load from 5 to 20 with step of 1
    for holdtime in range(10,41):
        
        #number of episodes flag for the inner while loop
        num_episodes = 0

        results_list = {}
        
        #generating request lists and game environment
        requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=holdtime, time_limit=time_limit, seed_no=seed_no)
        user = User()
        env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
        eveon = env
        check_env(env)
        heuristic = Heuristic(linkList)
        obs = env.reset()
        
        #for each traffic load setting the game runs for 30 episodes
        while num_episodes < 1:

            action, states_ = TL_10_model.predict(obs, deterministic=True )
            obs, rewards, dones, info = env.step(action)

            if dones == True:
                #print(info)

                results_list = info
                results_list['traffic_load'] = holdtime/req_int
                results_list['seed_no'] = seed_no
                
                print(results_list)
                #appending the final performance results into the results list for further processing
                results_RL_TL_10.append(results_list)
                env.reset()
                heuristic = Heuristic(linkList)
                num_episodes += 1
                #debug print
                #print("Episode: " + str(num_episodes))

            env.render()
    return results_RL_TL_10
    

In [5]:
total_results_TL10 = []

for seed_no in range(0,31):
    results = run_results_diff_seed(seed_no)
    total_results_TL10.append(results)

VSNL Topology Selected
0
Traffic load is: 5.0
No more requests.
Total reward for this episode is -75665.0000000004
{'bp': 0.55, 'avg_length': 5.777777777777778, 'blocked_continuous': 0.0, 'blocked_contiguous': 1.0, 'traffic_load': 5.0, 'seed_no': 0}
0
Traffic load is: 5.5
No more requests.
Total reward for this episode is -35938.99999999999
{'bp': 0.30000000000000004, 'avg_length': 4.571428571428571, 'blocked_continuous': 0.0, 'blocked_contiguous': 1.0, 'traffic_load': 5.5, 'seed_no': 0}
0
Traffic load is: 6.0
No more requests.
Total reward for this episode is -48958.99999999998
{'bp': 0.35, 'avg_length': 4.769230769230769, 'blocked_continuous': 0.0, 'blocked_contiguous': 1.0, 'traffic_load': 6.0, 'seed_no': 0}
0
Traffic load is: 6.5
No more requests.
Total reward for this episode is -26553.999999999975
{'bp': 0.25, 'avg_length': 4.133333333333334, 'blocked_continuous': 0.0, 'blocked_contiguous': 1.0, 'traffic_load': 6.5, 'seed_no': 0}
0
Traffic load is: 7.0
No more requests.
Total rew

In [6]:
#after for loop is completed, save the results list as a csv file
seed_test_TL10 = pd.DataFrame(total_results_TL10)
# saving the dataframe 
#INSERT PROPER RESULTS NAME HERE
seed_test_TL10.to_csv('2_3_BW_REQ_Rand_Seed_results_TL_10.csv')

In [4]:
#setting up the VSNL topology
nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)

results_RL_TL_10 = []

#Outer for loop to set hold time from 10 to 40 with increments of 2 to allow for collection of traffic load from 5 to 20 with step of 1
for holdtime in range(10,41):
    
    #number of episodes flag for the inner while loop
    num_episodes = 0

    results_list = {}
    
    #generating request lists and game environment
    requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=holdtime, time_limit=time_limit)
    user = User()
    env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
    eveon = env
    check_env(env)
    heuristic = Heuristic(linkList)
    obs = env.reset()
    
    #for each traffic load setting the game runs for 30 episodes
    while num_episodes < 30:

        action, states_ = TL_10_model.predict(obs, deterministic=True )
        obs, rewards, dones, info = env.step(action)

        if dones == True:
            #print(info)

            results_list = info
            results_list['traffic_load'] = holdtime/req_int
            
            #appending the final performance results into the results list for further processing
            results_RL_TL_10.append(info)
            env.reset()
            heuristic = Heuristic(linkList)
            num_episodes += 1
            #debug print
            #print("Episode: " + str(num_episodes))

        env.render()

VSNL Topology Selected
Traffic load is: 5.0
No more requests.
Total reward for this episode is -15791.0
Stop hitting yourself
No more requests.
Total reward for this episode is 18445.0
No more requests.
Total reward for this episode is 32464.000000000004
Too many invalid actions.
Total reward for this episode is -16874.0
Stop hitting yourself
Too many invalid actions.
Total reward for this episode is -9657.0
Too many invalid actions.
Total reward for this episode is -15874.0
Too many invalid actions.
Total reward for this episode is -15874.0
Too many invalid actions.
Total reward for this episode is -16874.0
Stop hitting yourself
No more requests.
Total reward for this episode is 8865.000000000004
No more requests.
Total reward for this episode is 30149.000000000007
Stop hitting yourself
No more requests.
Total reward for this episode is 17404.000000000004
Too many invalid actions.
Total reward for this episode is -16874.0
No more requests.
Total reward for this episode is 18785.0
Too 

Error: Canceled future for execute_request message before replies were done

In [5]:
#after for loop is completed, save the results list as a csv file
df_TL_10 = pd.DataFrame(results_RL_TL_10)
# saving the dataframe 
#INSERT PROPER RESULTS NAME HERE
df_TL_10.to_csv('results_RL_TL_10.csv')

## Traffic Load 15

In [6]:
#setting up the VSNL topology
nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)

results_RL_TL_15 = []

#number of episodes flag for the inner while loop
num_episodes = 0

results_list = {}

#generating request lists and game environment
requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=30, time_limit=time_limit, seed_no=42)
user = User()
env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
eveon = env
check_env(env)
heuristic = Heuristic(linkList)
obs = env.reset()

#for each traffic load setting the game runs for 30 episodes
while num_episodes < 5:

    action, states_ = TL_15_model.predict(obs, deterministic=True )
    obs, rewards, dones, info = env.step(action)

    if dones == True:
        #print(info)

        #results_list = info
        #results_list['traffic_load'] = holdtime/req_int
        
        #appending the final performance results into the results list for further processing
        results_RL_TL_15.append(info)
        env.reset()
        heuristic = Heuristic(linkList)
        num_episodes += 1
        #debug print
        #print("Episode: " + str(num_episodes))

    env.render()

VSNL Topology Selected
42
Traffic load is: 15.0
No more requests.
Total reward for this episode is -30000.0
No more requests.
Total reward for this episode is -50000.0
No more requests.
Total reward for this episode is -52800.0
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -57791.00000000001
Stop hitting yourself
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -75331.0
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -35831.99999999999
No more requests.
Total reward for this episode is -11725.0


In [8]:
#setting up the VSNL topology
nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)

results_RL_TL_15 = []

#Outer for loop to set hold time from 10 to 40 with increments of 2 to allow for collection of traffic load from 5 to 20 with step of 1
for holdtime in range(10,41):
    
    #number of episodes flag for the inner while loop
    num_episodes = 0

    results_list = {}
    
    #generating request lists and game environment
    requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=holdtime, time_limit=time_limit)
    user = User()
    env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
    eveon = env
    check_env(env)
    heuristic = Heuristic(linkList)
    obs = env.reset()
    
    #for each traffic load setting the game runs for 30 episodes
    while num_episodes < 30:

        action, states_ = TL_15_model.predict(obs, deterministic=True )
        obs, rewards, dones, info = env.step(action)

        if dones == True:
            #print(info)

            results_list = info
            results_list['traffic_load'] = holdtime/req_int
            
            #appending the final performance results into the results list for further processing
            results_RL_TL_15.append(info)
            env.reset()
            heuristic = Heuristic(linkList)
            num_episodes += 1
            #debug print
            #print("Episode: " + str(num_episodes))

        env.render()

VSNL Topology Selected
Traffic load is: 5.0
No more requests.
Total reward for this episode is 1376.0000000000061
No more requests.
Total reward for this episode is 24545.000000000004
No more requests.
Total reward for this episode is 25105.000000000007
No more requests.
Total reward for this episode is 24470.999999999996
No more requests.
Total reward for this episode is 10047.000000000002
Stop hitting yourself
No more requests.
Total reward for this episode is 12898.000000000007
No more requests.
Total reward for this episode is 25684.000000000004
Too many invalid actions.
Total reward for this episode is 22428.99999999999
No more requests.
Total reward for this episode is 14023.000000000002
No more requests.
Total reward for this episode is 23243.000000000004
No more requests.
Total reward for this episode is 7651.000000000002
No more requests.
Total reward for this episode is 25298.0
No more requests.
Total reward for this episode is -1429.9999999999911
No more requests.
Total rewa

In [9]:
#after for loop is completed, save the results list as a csv file
df_TL_15 = pd.DataFrame(results_RL_TL_15)
# saving the dataframe 
#INSERT PROPER RESULTS NAME HERE
df_TL_15.to_csv('results_RL_TL_15.csv')

## Getting data for Traffic Load 20 for each model and heuristic

In [5]:
#setting up the VSNL topology
nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)

#Creating list to store results
results_heuristic = []

#number of episodes flag for the inner while loop
num_episodes = 0

results_list = {}

#generating request lists and game environment
requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=40, time_limit=time_limit)
user = User()
env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
eveon = env
check_env(env)
heuristic = Heuristic(linkList)
obs = env.reset()

#for each traffic load setting the game runs for 30 episodes
while num_episodes < 30:

    action = heuristic.next_action(obs)
    obs, rewards, dones, info = env.step(action)

    if dones == True:
        print(info)

        results_list = info
        results_list['traffic_load'] = 40/req_int
        
        #appending the final performance results into the results list for further processing
        results_heuristic.append(info)
        env.reset()
        heuristic = Heuristic(linkList)
        num_episodes += 1
        #debug print
        #print("Episode: " + str(num_episodes))

    env.render()

VSNL Topology Selected
Traffic load is: 20.0
No more requests.
Total reward for this episode is -30000.0
No more requests.
Total reward for this episode is -50000.0
No more requests.
Total reward for this episode is -98.00000000000502
{'bp': 0.15000000000000002, 'avg_length': 2.823529411764706, 'blocked_continuous': 0.05, 'blocked_contiguous': 0.1}
No more requests.
Total reward for this episode is -1079.000000000002
{'bp': 0.15000000000000002, 'avg_length': 2.9411764705882355, 'blocked_continuous': 0.0, 'blocked_contiguous': 0.15}
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -25818.000000000007
{'bp': 0.25, 'avg_length': 2.8, 'blocked_continuous': 0.0, 'blocked_contiguous': 0.15}
No more requests.
Total reward for this episode is -13480.000000000013
{'bp': 0.19999999999999996, 'avg_length': 2.875, 'blocked_continuous': 0.1, 'blocked_contiguous': 0.1}
No more requests.
Total reward for this episode is -13494.000000000005
{'bp': 0.199999

In [6]:
#after for loop is completed, save the results list as a csv file
df_heuristic = pd.DataFrame(results_heuristic)
df_heuristic
# saving the dataframe 
#INSERT PROPER RESULTS NAME HERE
df_heuristic.to_csv('results_heuristic_TL_20.csv')

In [3]:
def results_TL_20(model):

    

    #Parameters for results collection
    #Holdtime = 10 to 40 (For a traffic load of 5 to 20)
    #Number of connection requests = 20
    num_req = 20
    #request interval = 2 seconds
    req_int = 2
    #time limit for each connection request = 10 seconds
    time_limit = req_int + 1
    #bandwidth per link = 5
    link_BW = 5

    #setting up the VSNL topology
    nodeList, linkList = createPresetTopology("VSNL", num_slots=link_BW)

    results = []


        
    #number of episodes flag for the inner while loop
    num_episodes = 0

    results_list = {}

    #generating request lists and game environment
    requestList = generateRequests(nodeList, numberOfRequests=num_req, req_interval=req_int, hold_time=40, time_limit=time_limit)
    user = User()
    env = game_gym(nodeList, linkList, requestList, user, dynamic=True)
    eveon = env
    check_env(env)
    heuristic = Heuristic(linkList)
    obs = env.reset()

    #for each traffic load setting the game runs for 30 episodes
    while num_episodes < 30:

        action, states_ = model.predict(obs, deterministic=True )
        obs, rewards, dones, info = env.step(action)

        if dones == True:
            #print(info)

            results_list = info
            results_list['traffic_load'] = 40/req_int
            
            #appending the final performance results into the results list for further processing
            results.append(info)
            env.reset()
            heuristic = Heuristic(linkList)
            num_episodes += 1
            #debug print
            #print("Episode: " + str(num_episodes))

        env.render()
    
    return results

In [4]:
TL_5_model_results = results_TL_20(TL_5_model)
TL_10_model_results = results_TL_20(TL_10_model)
TL_15_model_results = results_TL_20(TL_15_model)

VSNL Topology Selected
Traffic load is: 20.0
Stop hitting yourself
No more requests.
Total reward for this episode is -54372.0
Stop hitting yourself
Stop hitting yourself
Stop hitting yourself
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -69004.0
Stop hitting yourself
Stop hitting yourself
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -84457.0
Stop hitting yourself
No more requests.
Total reward for this episode is -32943.99999999999
Stop hitting yourself
No more requests.
Total reward for this episode is -62729.99999999999
Stop hitting yourself
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -82383.99999999999
Stop hitting yourself
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward for this episode is -55958.00000000001
Stop hitting yourself
Stop hitting yourself
Stop hitting yourself
No more requests.
Total reward fo

In [5]:
TL_5 = pd.DataFrame(TL_5_model_results)
TL_10 = pd.DataFrame(TL_10_model_results)
TL_15 = pd.DataFrame(TL_15_model_results)

# saving the dataframe 
#INSERT PROPER RESULTS NAME HERE
TL_5.to_csv('TL_5_Model_TL_20.csv')
TL_10.to_csv('TL_10_Model_TL_20.csv')
TL_15.to_csv('TL_15_Model_TL_20.csv')