In [26]:
import pandas as pd
import numpy as np
import pyomo.environ as pyo
from pyomo.environ import *
from pyomo.opt import SolverFactory
import matplotlib.dates as mdates
import random
import datetime
import gym
from gym import spaces

In [27]:
class TransportationEnv(gym.Env):
    
    # constants as per the problem getting initialized
    def __init__(self,num_transport,num_warehouses,num_customer_sites,num_routes,incompatible,days,capacity,time_transport,freight,quantity,cost,max_day,max_weight,outflow_warehouse,inflow_customersite,p):
        super(TransportationEnv, self).__init__()
        self.num_transport = num_transport
        self.num_warehouses = num_warehouses
        self.num_customer_sites = num_customer_sites
        self.num_routes = num_routes
        self.days = days
        self.capacity = capacity
        self.time_transport = time_transport
        self.incompatible = incompatible
        self.freight = freight
        self.quantity = quantity
        self.cost = cost
        self.max_day = max_day
        self.max_weight = max_weight
        self.p = p
        self.outflow_warehouse = outflow_warehouse
        self.inflow_customersite = inflow_customersite
        self.observation_space = spaces.MultiDiscrete([self.num_routes-1,self.num_transport-1,self.max_day])
        self.action_space = spaces.MultiDiscrete([self.num_transport-1,self.max_day])

        
    def make_sites_index(self):
        customer_sites=[]
        for i in range(self.num_customer_sites):
            customer_sites.append(i)
        self.customer_sites=customer_sites
        warehouse_sites=[]
        for i in range(self.num_warehouses):
            warehouse_sites.append(i)
        self.warehouse_sites=warehouse_sites
        
    def make_transport_index(self):
        transport=[]
        for i in range(self.num_transport):
            transport.append(i)
        self.transport=transport
        
    def compatible(self):
        compatibility=[]
        for i in self.routes:
            x={}
            for j in self.transport:
                x[j]=1
            for j in self.incompatible[i]:
                x[j]=0
            compatibility.append(x)
        self.compatibility=compatibility
        
    def routes_indexing(self):
        self.routes=[]
        for i in range(self.num_routes):
            self.routes.append(i)
            
    def possible_startdays(self):
        startdays=[]
        for i in range(self.max_day+1):
            startdays.append(i)
        self.startdays=startdays
        
    def possible_weights(self):
        weights=[]
        for i in range(self.max_weight+1):
            weights.append(i)
        self.weights=weights
        
    def supply_and_demand(self,supply,demand):
        self.demand=demand
        self.supply=supply
        
    # resetting to default state
    
    def reset(self):
        self.current_state = np.zeros([self.num_routes,self.num_transport,self.max_day+1])
        self.supply_current = np.zeros([self.max_day+1,self.num_warehouses])
        self.demand_current = np.zeros([self.max_day+1,self.num_customer_sites])
        self.current_route = 0
        self.current_day = 0
        self.reward = 0
        for i in range(self.num_warehouses):
            self.supply_current[0][i]=self.supply[i]
        self.done = False
        self.temp = 0
        
    # range of possible actions
    
    def step(self, action):
        transport = action[0]
        startday = action[1]
        old_state = self.current_state
        print(transport,startday)
        temp = 0
        if (startday + self.time_transport[transport] > self.days[self.current_route][1]):
            temp = -1000
        if (startday <= self.current_day):
            temp = -1000
        if (self.freight[self.current_route] > self.capacity[transport]):
            temp = -1000
        if (self.compatibility[self.current_route][transport] == 0):
            temp = -1000
        self.current_state[self.current_route][transport][startday] = 1
        flag = 0
        for i in range (max(0,startday-self.time_transport[transport]),startday+1):
            lol = 0
            for j in range(i,min(i+self.time_transport[transport]+1,self.max_day+1)):
                for k in range(0,self.num_routes):
                    lol += self.current_state[k][transport][j]
            if (lol > self.quantity[transport]):
                flag = 1
                break
        if (flag == 1):
            temp = -1000
        if (temp == -1000):
            self.reward = -np.inf
            temp = -np.inf
            self.current_state = old_state
            self.done = True
        else:
            temp = self.cost[transport] * self.freight[self.current_route]
            self.reward -= temp
            self.current_day = startday + self.time_transport[transport]
        return old_state,temp,self.done
    
    def epsilon_greedy_action(self,epsilon):
        # if random.random() < epsilon:
            # Choose a random action (exploration)
        action = (random.randint(0,self.num_transport-1),random.randint(0,self.max_day))
        # else:
        #     # Choose the best-known action (exploitation)
        #     action = (-1,-1)
        #     maxi = -np.inf
        #     for i in range(self.num_transport):
        #         for j in range(self.max_day):
        #             if (self.Q_table[self.current_state][i][j] < 0 and self.Q_table[self.current_state][i][j] > maxi):
        #                 maxi = self.Q_table[self.current_state][i][j]
        #                 action = (i,j)
        return action

    def linear_decay(self, epsilon, decay_rate, min_epsilon):
        epsilon -= decay_rate
        return max(epsilon, min_epsilon)
            
        

In [28]:
num_transport=5
num_warehouses=2
num_customer_sites=2
num_routes=num_warehouses*num_customer_sites
max_day=30

# incompatible=[]
# for i in range(num_routes):
#     j=np.random.randint(0,num_transport-1)
#     temp=random.sample(range(num_transport),j)
#     incompatible.append(temp)
incompatible=[[1],[0],[],[0]]
# days=[]
# for i in range(num_routes):
#     j=np.random.randint(0,5)
#     k=np.random.randint(max_day-5,max_day+1)
#     days.append([j,k])
days=[[3,5],[2,4],[4,6],[1,3]]
time_transport=[1,1,2,5,10]
capacity=[10,15,20,50,100]
# freight=[]
# for i in range(num_routes):
#     j=np.random.randint(1,max(capacity)/2)
#     freight.append(j)
freight=[5,12,8,15]
total_sum=sum(freight)
cost=[50,50,40,30,60]
quantity=[50,50,50000,50000,50000]
max_weight=100
p=0.5
M = 10000000
outflow_warehouse=[]
for i in range(num_warehouses):
    temp={}
    for j in range(num_customer_sites):
        temp[num_customer_sites*i+j]=1
    outflow_warehouse.append(temp)
inflow_customersite=[]
for i in range(num_customer_sites):
    temp=[]
    for j in range(num_warehouses):
        temp.append(i+num_customer_sites*j)
    inflow_customersite.append(temp) 
# supply=[]
# for i in range(num_warehouses):
#     supply.append(np.random.randint(((total_sum/num_warehouses)+1),2000))
supply=[20,25]
# demand=[]
# for i in range(num_customer_sites):
#     x=0
#     for j in range(num_warehouses):
#         x=x+freight[i+num_customer_sites*j]
#     demand.append(x)
demand=[10,20]

In [29]:
env=TransportationEnv(num_transport,num_warehouses,num_customer_sites,num_routes,incompatible,days,capacity,time_transport,freight,quantity,cost,max_day,max_weight,outflow_warehouse,inflow_customersite,p)
env.make_sites_index()
env.make_transport_index()
env.routes_indexing()
env.compatible()
env.possible_startdays()
env.possible_weights()
env.supply_and_demand(supply,demand)

env.action_space.sample()

array([ 3, 13], dtype=int64)

In [30]:
env=TransportationEnv(num_transport,num_warehouses,num_customer_sites,num_routes,incompatible,days,capacity,time_transport,freight,quantity,cost,max_day,max_weight,outflow_warehouse,inflow_customersite,p)
env.make_sites_index()
env.make_transport_index()
env.routes_indexing()
env.compatible()
env.possible_startdays()
env.possible_weights()
env.supply_and_demand(supply,demand)

# environment created and called for different choices
total_epochs = 1000
curr_epochs = 0
ans = 0
epsilon = 1
min_epsilon = 0.1
decay_rate = 0.01
alpha = 0.1
gamma = 0.99
while (curr_epochs < total_epochs):
    curr_epochs += 1
    env.reset()
    temp = 0
    while not env.done:
        action = (random.randint(0,env.num_transport-1),random.randint(0,env.max_day))
    
        old_state, reward, env.done = env.step(action)

        if (env.done):
            env.done = False
        else:
            print(f"Reward till route {env.current_route} is {env.reward}")
            # if (reward > -np.inf):
            #     env.Q_table[old_state[0]][old_state[1]][old_state[2]][action[0]][action[1]] = env.Q_table[old_state[0]][old_state[1]][old_state[2]][action[1]][action[2]] + alpha * (reward + gamma * np.max(env.Q_table[env.current_state, :]) - env.Q_table[old_state[0]][old_state[1]][old_state[2]][action[0]][action[1]])
            # else:
            #     env.Q_table[old_state][action[0]][action[1]] = -np.inf
            if (env.current_day >= env.days[env.current_route][0] and env.current_day <= env.days[env.current_route][1]):
                env.current_day = 0
                env.current_route += 1
            if (env.current_route >= env.num_routes):
                env.done = True
                print(env.reward)
    epsilon = env.linear_decay(epsilon, decay_rate, min_epsilon)
    ans = min(ans, env.reward)

print(ans)
env.close()

2 14
3 9
2 4
4 3
3 14
4 29
4 0
3 6
2 17
1 11
3 22
0 4
Reward till route 0 is -inf
4 8
2 26
2 9
3 27
4 29
4 20
1 8
3 9
3 30
4 27
2 8
3 19
1 19
2 28
1 21
3 1
0 22
3 3
3 2
4 23
2 29
0 15
3 27
4 15
2 28
3 29
2 12
3 27
2 27
1 12
3 27
3 12
4 29
4 1
2 16
1 22
4 21
4 24
0 29
0 10
2 1
Reward till route 1 is -inf
0 11
2 8
4 15
4 16
2 17
2 26
3 7
2 27
0 7
4 10
0 19
3 4
2 14
0 11
3 29
1 5
Reward till route 2 is -inf
1 14
0 7
2 18
2 14
4 24
1 13
3 12
2 5
1 21
4 15
2 28
0 5
2 8
0 11
0 25
0 15
0 16
3 16
1 20
1 11
3 27
2 8
0 15
0 7
2 17
3 16
4 18
3 25
4 18
0 20
3 1
0 29
2 2
4 16
1 22
1 28
4 13
4 13
3 2
0 30
0 16
4 15
3 21
1 20
3 29
0 17
3 14
2 7
1 9
1 11
0 10
1 28
4 9
3 15
4 22
1 21
0 30
0 18
1 8
1 15
1 16
1 15
3 30
1 16
2 25
4 4
1 13
0 30
0 1
2 29
4 29
4 17
4 17
4 23
4 15
3 5
3 25
2 11
0 9
0 14
4 19
0 12
4 21
0 14
4 14
2 21
4 10
4 21
3 28
3 29
0 9
4 27
1 5
0 12
2 26
2 19
0 19
4 4
0 4
1 21
0 12
4 6
4 23
4 23
0 2
2 13
1 23
1 29
4 3
2 11
2 15
1 29
4 2
1 9
4 26
1 4
1 14
4 24
0 20
2 30
4 19
4 13
2 5
0 8
4