# Car Rental Problem

## Approach

The idea is that we have state as a tuple - 
        - Number of cars in Location 1
        - Number of cars in Location 2
        - Number of cars in Location 3
And we can take actions in the form of movement of cars from one location to another location over night.

In [132]:
import numpy as np
import math, itertools, time, copy

In [209]:
class Environment :
    def __init__(self, size, requestList, returnList, gamma, reward) :
        self.size = size
        self.V = np.zeros(size)
        self.policy = np.zeros((size[0], size[1], size[2], 3))
        self.gamma = gamma
        self.requestList = requestList 
        self.returnList = returnList
        self.reward = reward        
        z = max(size[0], size[1], size[2])
        self.P = np.zeros((3, z, z))
        self.R = np.zeros((3, z, z))
        self.P1 = np.zeros((z, z))
        self.P2 = np.zeros((z, z))
        self.P3 = np.zeros((z, z))
        self.R1 = np.zeros((z, z))
        self.R2 = np.zeros((z, z))
        self.R3 = np.zeros((z, z))
        
    def poisson(self, k, lam) :
        return ((lam**k) * np.exp(-lam))/np.math.factorial(k)
        
#     def getProb(self, nextCar1, nextCar2, nextCar3, car1, car2, car3, request1, request2, request3) :
#         Prob_car1 = self.poisson(request1, self.requestList[0]) * self.poisson(nextCar1-car1+request1, self.returnList[0])
#         Prob_car2 = self.poisson(request2, self.requestList[1]) * self.poisson(nextCar2-car2+request2, self.returnList[1])
#         Prob_car3 = self.poisson(request3, self.requestList[2]) * self.poisson(nextCar3-car3+request3, self.returnList[2])
#         return Prob_car1 * Prob_car2 * Prob_car3
    
    def isValid(self, i, j, k, car1, car2, car3) :
        if((i+car1) < 0 or (i+car1) >= self.size[0] or (j+car2) < 0 or (j+car2) >= self.size[1]
              or (k+car3) < 0 or (k+car3) >= self.size[2]) :
            return False
        return True
    
    def totalActions(self, car1, car2, car3) :
        actions = []
        for i, j, k in itertools.product(np.arange(-5, 6), np.arange(-5, 6), np.arange(-5, 6)) :
            if (i + j + k == 0) :
                if((self.isValid(i, j, k, car1, car2, car3))) :
                    actions.append([i, j, k])
            
        return actions
    
    #================Pre-Calculations of Probability and Rewards============
    def storeAll(self) :
        size1 = np.arange(self.size[0])
        size2 = np.arange(self.size[1])
        size3 = np.arange(self.size[2])
        for (car, nextCar) in itertools.product(size1, size1) :
            for req, ret in itertools.product(size1, size1) :
                if(req > car or car-req+ret != nextCar):
                    continue    
                self.P1[car, nextCar] += (self.poisson(req, requestList[0]) * self.poisson(ret, returnList[0]))
                self.R1[car, nextCar] += (self.poisson(req, requestList[0]) * self.poisson(ret, returnList[0]) 
                                     * req * 10)
                                     
        for (car, nextCar) in itertools.product(size2, size2) :
            for req, ret in itertools.product(size2, size2) :
                if(req > car or car-req+ret != nextCar):
                    continue    
                self.P2[car, nextCar] += (self.poisson(req, requestList[1]) * self.poisson(ret, returnList[1]))
                self.R2[car, nextCar] += (self.poisson(req, requestList[1]) * self.poisson(ret, returnList[1]) 
                                     * req * 10)
                                    
        for (car, nextCar) in itertools.product(size3, size3) :
            for req, ret in itertools.product(size3, size3) :
                if(req > car or car-req+ret != nextCar):
                    continue    
                self.P3[car, nextCar] += (self.poisson(req, requestList[2]) * self.poisson(ret, returnList[2]))
                self.R3[car, nextCar] += (self.poisson(req, requestList[2]) * self.poisson(ret, returnList[2]) 
                                     * req * 10)
#     def storeAll(self) :
#         print("Pre-calculating Probability and Rewards....")
#         for cars in itertools.product(np.arange(self.size[0]), np.arange(self.size[1]), np.arange(self.size[2])) :
#             for nextCars in itertools.product(np.arange(self.size[0]), np.arange(self.size[1]), np.arange(self.size[2])) :
# #             actions = self.totalActions(cars[0], cars[1], cars[2])
# #             for action in actions :
# #                 nextCars = list(copy.copy(cars))
# #                 nextCars[0] += action[0]
# #                 nextCars[1] += action[1]
# #                 nextCars[2] += action[2]
# #                 print("cars : ", cars[0], cars[1], cars[2])
# #                 print("nextCars : ", nextCars[0], nextCars[1], nextCars[2])
#                 for i in range(3) :
#                     psum = 0
#                     rsum = 0
#                     diff = abs(cars[i] - nextCars[i])
#                     for j in range(min(cars[i], nextCars[i]) + 1) :
#                         if(min(cars[i], nextCars[i]) == cars[i]) :
#                             req = j
#                         else :
#                             req = diff + j
#                         t = self.poisson(j, requestList[i])*self.poisson(j + diff, returnList[i])
#                         psum += t
#                         rsum += (t * req * self.reward)
#                     self.P[i, cars[i], nextCars[i]] = psum
#                     self.R[i, cars[i], nextCars[i]] = rsum
#         print("Pre-calculation done ...")
                    
    def ValueIteration(self) :
        
        iter = 0
        while(iter < 10) :
            iter += 1
            print("======= iter ", iter, " ========== ")
            i = 0
            delta = 0   
            for car1, car2, car3 in itertools.product(np.arange(self.size[0]), np.arange(self.size[1]), np.arange(self.size[2])) :
                i += 1
                prevCar1, prevCar2, prevCar3 = car1, car2, car3
                bestAction = None
                actions = self.totalActions(car1, car2, car3)
                maxReward = 0

                for action in actions :
                    car1 = prevCar1 + action[0]
                    car2 = prevCar2 + action[1]
                    car3 = prevCar3 + action[2]
                    reward = abs(action[0]) * -2     # cost of 2 for Location 1
                    
                    for nextCar1, nextCar2, nextCar3 in itertools.product(np.arange(self.size[0]), np.arange(self.size[1]), np.arange(self.size[2])) :

#                         P_car1 = self.P[0, car1, nextCar1]
#                         P_car2 = self.P[1, car2, nextCar2]
#                         P_car3 = self.P[2, car3, nextCar3]

#                         R_car1 = self.R[0, car1, nextCar1]
#                         R_car2 = self.R[1, car2, nextCar2]
#                         R_car3 = self.R[2, car3, nextCar3]
                                     
                        P_car1 = self.P1[car1, nextCar1]
                        P_car2 = self.P2[car2, nextCar2]
                        P_car3 = self.P3[car3, nextCar3]
                                     
                        R_car1 = self.R1[car1, nextCar1]
                        R_car2 = self.R2[car2, nextCar2]
                        R_car3 = self.R3[car3, nextCar3]     
                                     
#                         print(nextCar1, nextCar2, nextCar3)
#                         print("1 : ", (R_car1, R_car2, R_car3))
#                         print("2 : ", (P_car1, P_car2, P_car3))
#                         print("3 : ", (P_car1 * P_car2 * R_car3))
#                         print("4 : ", P_car1*P_car2*P_car3)
#                         print("5 : ", (P_car1*P_car2*P_car3 * gamma * self.V[nextCar1,nextCar2, nextCar3]))
                        immReward = (P_car1*P_car2*R_car3)+(P_car1*R_car2*P_car3)+(R_car1*P_car2* P_car3)
                        reward += immReward + (P_car1*P_car2*P_car3 * self.gamma * self.V[nextCar1,nextCar2, nextCar3])
#                     print("reward : ", reward)
                    if(reward > maxReward) :
                        maxReward = reward
                        bestAction = action
                        
                prevVal = self.V[prevCar1, prevCar2, prevCar3]
                self.V[prevCar1, prevCar2, prevCar3] = maxReward
                delta = max(delta, abs(maxReward - prevVal))
                self.policy[prevCar1, prevCar2, prevCar3] = np.array(bestAction)
                print("max : ", maxReward)
                if(i % 100 == 0) :
                    print("i = ", i)
                    print("State : ", (prevCar1, prevCar2, prevCar3), "V = ", self.V[prevCar1, prevCar2, prevCar3])
                    print("Max-reward : ", np.max(self.V))
                    print("delta : ", delta)
                    print("policy = ", self.policy[prevCar1, prevCar2, prevCar3])
                    print("self.V : ", self.V)
                
            if(delta < 0.1) :
                print("\n========= Value Iteration Converged =========")
                print("TOTAL ITERATIONS : ", iter)
                break


In [210]:
# size = [20, 10, 10]
size = [10, 5, 5]
requestList = [3, 2, 2]
returnList = [3, 1, 1]
gamma = 0.9
reward = 10
envObj = Environment(size, requestList, returnList, gamma, reward)
a = time.time()
envObj.storeAll()
envObj.ValueIteration()
b = time.time()
print(int((b-a)) ,"s")

np.savetxt("value.txt", np.ravel(envObj.V).reshape(50, 5), fmt = "%5.2f", header = 'Values')
np.savetxt("policy.txt", np.ravel(envObj.policy).reshape(250, 3), fmt = "%d", header = "Policy")
print("value.txt Saved")
print("policy.txt Saved")

max :  0
max :  0.018084429787316194
max :  0.10795073474915659
max :  0.24957498698844213
max :  0.5281372905676744
max :  0.018087803890215443
max :  0.10795748306690359
max :  0.24959454559194108
max :  0.5281750799800567
max :  0.7605719277598993
max :  0.10798159323967342
max :  0.24964550736873928
max :  0.5282446258331294
max :  0.7608389977359914
max :  3.0753088532466877
max :  0.2497307312336496
max :  0.5283994302371054
max :  0.7612450132681269
max :  3.075917970842496
max :  6.33337284251367
max :  0.5286335047842955
max :  0.761881500413094
max :  3.076765416723241
max :  6.334084292245695
max :  10.331362428038556
max :  0.027150648072066175
max :  0.15335111329115989
max :  0.6735949752939587
max :  1.3949790239920479
max :  2.7632155802186675
max :  0.15355352367694494
max :  0.6739998817442416
max :  1.396117337967157
max :  2.7653952136864373
max :  5.0847581658093235
max :  0.6753917168762438
max :  1.3990368368847077
max :  2.7693900559880325
max :  5.0930376139047

max :  22.672373200358077
max :  30.97236299533178
max :  37.90981575111738
max :  41.005382851506894
max :  23.155873960404794
max :  31.48024267656035
max :  38.50765349910728
max :  41.66847062265512
max :  43.23936923144299
max :  32.08211417299828
max :  39.19373845555101
max :  42.25890670555076
max :  43.7524153859368
max :  45.05482322791653
max :  7.347070015630096
max :  14.806128023130164
max :  21.71064723304283
max :  30.517721418565607
max :  37.791457807332556
max :  14.889037507016802
max :  21.798339405915396
max :  30.616546679565317
max :  38.02464705515043
max :  41.310877467776066
max :  22.11084883313677
max :  30.85975440503141
max :  38.40643987930373
max :  41.91488711346034
max :  43.54764655249271
max :  31.292177448373998
max :  39.00523515195055
max :  42.60477227187603
max :  44.20509819975187
max :  45.30899393364077
max :  39.683795145260014
max :  43.35375434957294
max :  44.87419741400235
max :  46.020484796441124
max :  45.66528204498119
i =  200
Stat

max :  16.356806441552518
max :  1.245415237479739
max :  4.555383199426282
max :  8.727357212947407
max :  16.372839512551465
max :  21.950754053233762
max :  4.5649356142660595
max :  8.739400341946027
max :  16.39851062656971
max :  21.986680146970063
max :  29.153629471466218
max :  8.761731610725128
max :  16.437023239437593
max :  22.030922087592604
max :  29.218064649714478
max :  37.87900248857976
max :  16.474629894196266
max :  22.086873292292463
max :  29.29166133612525
max :  37.94151509194184
max :  43.26009698902725
i =  100
State :  (3, 4, 4) V =  43.26009698902725
Max-reward :  47.553337787754984
delta :  12.388959468562923
policy =  [ 2. -1. -1.]
self.V :  [[[4.48894636e-03 3.40867095e-02 1.63815515e-01 3.60110987e-01
   7.41102156e-01]
  [3.40874973e-02 1.63818889e-01 3.60117009e-01 7.41119743e-01
   1.69648417e+00]
  [1.63832162e-01 3.60133470e-01 7.41161957e-01 1.69663436e+00
   5.31120854e+00]
  [3.60179834e-01 7.41277278e-01 1.69695677e+00 5.31169213e+00
   1.0257

max :  43.46847823981764
max :  48.98205602300181
max :  50.57745156683773
max :  51.85268890443782
max :  43.535901149044506
max :  49.07370842889186
max :  50.66173147124698
max :  51.92190951712038
max :  52.71527316039424
max :  49.167867427806854
max :  50.748160105947875
max :  52.00651369323102
max :  52.808691868920654
max :  49.50962580055662
max :  21.812467280288534
max :  31.89623869050727
max :  41.63728091573825
max :  47.22906844042968
max :  48.83102688386928
max :  31.899610931336113
max :  41.64433530447012
max :  47.24481386492114
max :  48.84454563509075
max :  50.10997484464691
max :  41.6636988350465
max :  47.27674565809431
max :  48.865940881089564
max :  50.132553962057834
max :  50.97648044867804
max :  47.32598251481577
max :  48.9047522119239
max :  50.16544704004732
max :  51.02569135137585
max :  47.76448223538461
max :  48.94590849658243
max :  50.20673983850748
max :  51.07909918888975
max :  47.82126287011779
max :  41.171994534315836
max :  0.005707044

max :  24.030427455747855
max :  31.972899750184283
max :  42.12462508970593
max :  47.89199565942244
max :  24.043046135476814
max :  31.989815898504986
max :  42.14828225848578
max :  47.91014002126867
max :  49.69859497165977
max :  0.8152994371775256
max :  5.222171600507157
max :  13.311615094299457
max :  22.052346516173053
max :  30.008349157972088
max :  5.223681444984807
max :  13.314846300699099
max :  22.058330092456032
max :  30.01590980294903
max :  40.18423916841613
max :  13.32434763590818
max :  22.06990584754582
max :  30.02912581687017
max :  40.201812529927224
max :  49.97168906385232
max :  22.08718884131717
max :  30.051318896688574
max :  40.22493449684288
max :  49.99695761029779
max :  51.78223054695379
max :  30.06846468872126
max :  40.252178565154765
max :  50.02105798317067
max :  51.80943948209054
max :  53.299374348800214
max :  3.2437256773919607
max :  11.35185663899271
max :  20.10760881539973
max :  28.083719774699325
max :  38.27419735419794
max :  11

max :  0.05381148223758831
max :  0.24751429230249553
max :  1.0010066662750476
max :  2.0371990084442726
max :  3.9985833108281126
max :  0.24751622427919764
max :  1.001014933233214
max :  2.0372137802742123
max :  3.998626179358234
max :  8.002106614613895
max :  1.0010427354503413
max :  2.0372477650268235
max :  3.9987065319313393
max :  8.002270505528422
max :  13.400655181520818
max :  2.0373204070671194
max :  3.9988579723161743
max :  8.002488318036873
max :  13.40089201236867
max :  19.372510113879603
max :  3.9990319400035297
max :  8.002740136425013
max :  13.401100906837954
max :  19.372922574242278
max :  27.054984987255246
max :  0.17371117353859652
max :  0.7056740742270724
max :  2.6706527149213564
max :  5.234860481614949
max :  10.003020485673334
max :  0.7056870663089375
max :  2.6707081327121323
max :  5.234959560290005
max :  10.003305942252211
max :  15.40193807437431
max :  2.67088328433716
max :  5.235171755745873
max :  10.003789909982908
max :  15.40267273846

max :  36.49032022613626
max :  46.33095386187192
max :  51.98031584316469
max :  53.59105695914447
max :  36.49225599072129
max :  46.33363619355799
max :  51.98333319875313
max :  53.593876526839615
max :  54.88059076983133
max :  46.336412893043104
max :  51.986399248184554
max :  53.59655590108088
max :  54.88332026591203
max :  55.679775916636906
i =  200
State :  (7, 4, 4) V =  55.679775916636906
Max-reward :  56.87038083853393
delta :  0.4550553924448124
policy =  [0. 0. 0.]
self.V :  [[[5.96903920e-03 3.92968365e-02 1.82143187e-01 3.96164603e-01
   8.11981982e-01]
  [3.92968821e-02 1.82143383e-01 3.96164952e-01 8.11983000e-01
   1.99849505e+00]
  [1.82144147e-01 3.96165899e-01 8.11985421e-01 1.99850369e+00
   6.00188198e+00]
  [3.96168500e-01 8.11991810e-01 1.99852147e+00 6.00190864e+00
   1.14001731e+01]
  [8.12000991e-01 1.99854746e+00 6.00194019e+00 1.14001994e+01
   1.73714847e+01]]

 [[5.38114822e-02 2.47514292e-01 1.00100667e+00 2.03719901e+00
   3.99858331e+00]
  [2.4751

max :  32.25587766028509
max :  7.289522297916692
max :  15.436770992818488
max :  24.23928103987659
max :  32.256314845332774
max :  42.50452277243952
max :  15.43720021225229
max :  24.239700498790675
max :  32.25684870152676
max :  42.50520091798724
max :  48.35089214278189
max :  24.240074594013187
max :  32.25734321266137
max :  42.50588969010542
max :  48.35141602792638
max :  50.14465992662798
max :  0.85224999805037
max :  5.289977809749339
max :  13.437726826812279
max :  22.240346564260328
max :  30.257879047770015
max :  5.290023040965205
max :  13.437822998877142
max :  22.2405247356448
max :  30.258102486899087
max :  40.50692443651623
max :  13.438099828626436
max :  22.240859942934883
max :  30.25848326445407
max :  40.50742651287109
max :  50.35317473517494
max :  22.241347884041254
max :  30.259102426411374
max :  40.50806748074351
max :  50.353870783902885
max :  52.146998962682275
max :  30.25956685214794
max :  40.50880208239834
max :  50.354514769830445
max :  52.1

In [184]:
np.max(envObj.P)
# envObj.P

0.2115950405634218

In [196]:
envObj.R[0, 1, 3]

0.3346315438499584

In [194]:
np.max(envObj.R)
envObj.R

array([[[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 0.00000000e+00],
        [7.43625653e-02, 2.23087696e-01, 3.34631544e-01, 3.34631544e-01,
         2.50973658e-01, 1.50584195e-01, 7.52920974e-02, 3.22680417e-02,
         1.21005156e-02, 4.03350522e-03],
        [2.23087696e-01, 7.43625653e-01, 1.22698233e+00, 1.33852618e+00,
         1.08755252e+00, 7.02726242e-01, 3.76460487e-01, 1.72096223e-01,
         6.85695887e-02, 2.42010313e-02],
        [3.34631544e-01, 1.22698233e+00, 2.24946760e+00, 2.73282427e+00,
         2.46790764e+00, 1.76518139e+00, 1.04154068e+00, 5.21666675e-01,
         2.26548543e-01, 8.67203621e-02],
        [3.34631544e-01, 1.33852618e+00, 2.73282427e+00, 3.75530955e+00,
         3.86220574e+00, 3.14553651e+00, 2.10399583e+00, 1.18674687e+00,
         5.76118995e-01, 2.44699316e-01],
        [2.50973658e-01, 1.08755252e+00, 2.46790764e+00, 3.8

In [211]:
np.max(envObj.V)

56.90804058903058