# todos

## Environment



- [ ]create `contracts_df` from `create_contracts()`.
   * Columns : 
     * `start_port`
     * `ship_type`
     * `end_port`
     * `start_day` =  start_step + (steps needed for pick up == dm / 14 knots) ? probably 10 knots
     * `end_day` = start_day + max_distance in distance_matrix / 10 knots in steps
     * `value_in_$` = values_dict for each ship type
     * `assigned_ship` = [0,1,2] None, ship_1, ship_2 
- [ ] create `can_reach` function that calculates if a ship can make it on time for the contract
   * maybe implement `can_reach` with 10,12,14 knots? 



## Agent
- [ ] Action Space:
   * Choose contract Discrete 5 [0,1,2,3,4] choose zero, one, two, three or all contracts
   * For each contract: 
     * Choose ship Discrete 3 [0,1,2]
     * Choose speed Discrete 3 [10,12,14]
- [ ] 

In [1]:
import gym
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [5]:
# load data
data_dict = {
    "fleet_path":'data/fleet_small.csv',
    "ports_path":'data/ports_10.csv',
    "dm_path" : 'data/distance_matrix.csv'
}


ports_df = pd.read_csv(data_dict['ports_path'])
fleet_df = pd.read_csv(data_dict['fleet_path'])
distance_matrix = pd.read_csv(data_dict['dm_path'])




In [None]:
# set fleet at random ports
fleet_df['current_port'] =  np.random.randint(1,11,fleet_df.shape[0])

In [None]:
# get distance matrix as numpy array
dist_cols = distance_matrix.columns.to_list()
del dist_cols[0]
dm_array = distance_matrix.loc[:,dist_cols].to_numpy()

In [9]:
dm_simple = distance_matrix.iloc[:,1:]

Unnamed: 0,Shangai,Singapore,Xiamen,Dalian,Guangzhou,Qingdao,Busan,Hong Kong,Tianjin,Kaohsiung
0,0,2237,585,551,928,399,492,845,696,600
1,2237,0,1653,2619,1543,2463,2503,1460,2764,1621
2,585,1653,0,1002,370,846,886,287,1147,124
3,551,2619,1002,0,1335,356,543,1256,198,1029
4,928,1543,370,1335,0,1183,1223,83,1480,425
5,399,2463,846,356,1183,0,502,1100,412,876
6,492,2503,886,543,1223,502,0,1140,688,908
7,845,1460,287,1256,83,1100,1140,0,287,342
8,696,2764,1147,198,1480,412,688,287,0,1174
9,600,1621,124,1029,425,876,908,342,1174,0


In [None]:
ports_df = ports_df.loc[:,['number','name','country']]

In [112]:
def create_contracts(dm=distance_matrix,ports=ports_df,day=1,seed=None):
    """
    A function for creating cargo contracts for a specific day of the year
    """
    con_df = pd.DataFrame(columns=['start_port_number','end_port_number','contract_type','start_day','end_day','cargo_size','contract_duration','port_distance','value'])
    ports = ports.number.to_numpy()
    ship_types =  np.array(['supramax','ultramax','panamax','kamsarmax'])
    con_df['start_port_number'] = np.repeat(ports,4)
    con_df['contract_type'] = np.tile(ship_types,10)
    con_df['end_port_number'] = np.random.randint(low=1, high=11, size=(40,))
    same_ports = con_df['start_port_number'] == con_df['end_port_number']
    while sum(same_ports) != 0 :
        con_df['end_port_number'] = np.where(same_ports, np.random.randint(low=1, high=11, size=same_ports.shape), con_df['end_port_number'])
        same_ports = con_df['start_port_number'] == con_df['end_port_number']
    con_df['start_day'] = day

    # get distance between start and end ports arrays
    start_port_numbers_index = con_df['start_port_number'] - 1
    end_port_numbers_index = con_df['end_port_number']
            
    dist_df = dm.iloc[start_port_numbers_index,end_port_numbers_index]
    # the distance 
    con_df['port_distance'] = pd.Series(np.diag(dist_df)).reindex()

    # Create cargo size based on ship_type
    type_conditions = [con_df['contract_type'] == 'supramax',
                    con_df['contract_type'] == 'ultramax',
                    con_df['contract_type'] == 'panamax',
                    con_df['contract_type'] == 'kamsarmax']


    cargo_size_choices = [np.random.randint(40_000,50_000,type_conditions[0].shape),
                        np.random.randint(50_000,60_000,type_conditions[1].shape),
                        np.random.randint(60_000,70_000,type_conditions[2].shape),
                        np.random.randint(70_000,80_000,type_conditions[3].shape)]

    con_df['cargo_size'] = np.select(type_conditions,cargo_size_choices)


    ship_type_to_ship_code_choices = [np.ones(shape=type_conditions[0].shape),
                              2*np.ones(shape=type_conditions[1].shape),
                              3*np.ones(shape=type_conditions[2].shape),
                              4*np.ones(shape=type_conditions[3].shape)]

    con_df['contract_type'] = np.select(type_conditions,ship_type_to_ship_code_choices)



    # calculate duration

    # pick random speed from possible set of speeds
    u_picked = np.random.choice([10,12,14])

    # pick distance between ports from df
    dx = con_df['port_distance']
    # find duration of trip between ports with picked speed in hours
    dt_hours = ( dx / u_picked).round()
    # find duration of trip between ports in days
    dt_days = (dt_hours / 24).round()

    # get upper triangle entries of distance matrix
    x = dm.iloc[:,1:].to_numpy(dtype=np.int32)
    mask_upper = np.triu_indices_from(x,k=1)
    triu = x[mask_upper]
    # average voyage distance between ports in the distance matrix
    avg_dx = np.round(triu.mean())
    # average voyage duration between ports with picked speed in hours
    avg_dt_hours = np.round(avg_dx/u_picked)
    # # average voyage duration between ports with picked speed in days
    avg_dt_days = np.round(avg_dt_hours / 24)

    # total duration

    con_df['contract_duration'] = dt_days + avg_dt_days


    # end_day ends at 23:59
    con_df['end_day'] = con_df['start_day'] + con_df['contract_duration'] - 1

    # add contract value 
    con_df['value'] = round(con_df['cargo_size'] * (con_df['port_distance'] / (con_df['contract_duration'] * 1_000_000)))
    return con_df


In [138]:
# here I create the tensor

empty = pd.DataFrame(columns=['start_port_number','end_port_number','contract_type','start_day','end_day','cargo_size','contract_duration','port_distance','value'])
contracts_df = empty.copy()
for i in range(1,365+1):
    x = create_contracts(day=i)
    contracts_df = contracts_df.append(x, ignore_index=True)

# convert everything to float for tensorflow compatibility
contracts_df = contracts_df.astype(np.float32)

# create the input tensor
contracts_tensor = tf.convert_to_tensor(contracts_df)

# add a batch size dimension
contracts_tensor = tf.expand_dims(contracts_tensor,axis=0)

# contracts tensor is the input tensor

In [133]:

def find_distance(port_number_1, port_number_2, dist_m=distance_matrix):
    """
    find port distances from port numbers
    port numbers are port are port indices + 1
    port numbers must be in [1-10] range
    """
    dist_m = dist_m.iloc[:, 1:]
    idx_1 = port_number_1-1
    idx_2 = port_number_2-1
    distance = dist_m.iloc[idx_1, idx_2]
    return distance

In [137]:
contracts_tensor

<tf.Tensor: shape=(1, 14600, 9), dtype=float64, numpy=
array([[[1.000e+00, 9.000e+00, 1.000e+00, ..., 7.000e+00, 6.960e+02,
         4.000e+00],
        [1.000e+00, 8.000e+00, 2.000e+00, ..., 8.000e+00, 8.450e+02,
         6.000e+00],
        [1.000e+00, 8.000e+00, 3.000e+00, ..., 8.000e+00, 8.450e+02,
         7.000e+00],
        ...,
        [1.000e+01, 4.000e+00, 2.000e+00, ..., 8.000e+00, 1.029e+03,
         7.000e+00],
        [1.000e+01, 3.000e+00, 3.000e+00, ..., 4.000e+00, 1.240e+02,
         2.000e+00],
        [1.000e+01, 9.000e+00, 4.000e+00, ..., 8.000e+00, 1.174e+03,
         1.100e+01]]])>

In [247]:
# Delay

contracts = create_contrs(seed=56)
agent_speed = 10
agent_ship = 2
ship_current_port = 4
contract = contracts.loc[0]
start_port_number = contract.start_port_number
end_port_number = contract.end_port_number
trip_1_hours = find_distance(ship_current_port,start_port_number) / agent_speed
print(dt_1_hours)
trip_2_hours = find_distance(start_port_number,end_port_number) / agent_speed
print(dt_2_hours)
trip_total_hours = trip_1_hours + trip_2_hours


trip_total_days = round(trip_total_hours / 24)
print(trip_total_days)

contract_duration = contract.contract_duration
delay = trip_total_days - contract_duration
delay_penalty_factor_per_day = 10

reward = 0
penalty = 0 
if delay <= 0:
    # I arrived on time
    # provide a constant reward > 0 
    reward = 10
    # 
else:
    # I arrived late 
    # penalty < 0
    delay_in_days = -delay
    penalty = delay_penalty_factor_per_day * delay_in_days

print(f"The delay is {delay} days")
print(f"The reward is {reward}")
print(f"The penalty is {penalty}")

100.2
37.0
2
The delay is -4.0 days
The reward is 10
The penalty is 0


In [258]:
fleet_df['ship_number'] == 2

0    False
1     True
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
Name: ship_number, dtype: bool

In [261]:
# cargo_size vs ship_shize

contracts = create_contrs(seed=56)

# gia ka8e contract se spawned contracts

contract = contracts.loc[0]

fleet_df
# elegxw ka8e ploio pou einai available dhladh exei availability 1 
mask_of_available_ships = fleet_df['ship_availability'] == 1

available_ships = fleet_df[mask_of_available_ships]

available_ships_numbers = available_ships.ship_number

available_ships_numbers

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
Name: ship_number, dtype: int64

In [None]:


# checkare sto masking an kaneis mask out tous assous h ta mhdenika

# gia ka8e tetoio ploio


# get ship_number

ship_numbers = 
agent_ship_selection = 2

# get ship info
ship_mask = fleet_df['ship_number'] == agent_ship_selection

ship_info = fleet_df[ship_mask]
print(ship_info)

ship_current_port = ship_info.current_port
print(ship_current_port)






start_port_number = contract.start_port_number
end_port_number = contract.end_port_number
trip_1_distance = find_distance(ship_current_port,start_port_number)
print(trip_1_distance)
trip_2_distance = find_distance(start_port_number,end_port_number)
print(trip_2_distance)
trip_total_distance = trip_1_distance + trip_2_distance




trip_total_days = round(trip_total_hours / 24)
print(trip_total_days)

contract_duration = contract.contract_duration
delay = trip_total_days - contract_duration
delay_penalty_factor_per_day = 10

reward = 0
penalty = 0 
if delay <= 0:
    # I arrived on time
    # provide a constant reward > 0 
    reward = 10
    # 
else:
    # I arrived late 
    # penalty < 0
    delay_in_days = -delay
    penalty = delay_penalty_factor_per_day * delay_in_days

print(f"The delay is {delay} days")
print(f"The reward is {reward}")
print(f"The penalty is {penalty}")


In [205]:
import gym
from gym import spaces
from gym.spaces.discrete import Discrete
import pandas as pd
import numpy as np
import tensorflow as tf
from utils.utils import cii_expected

class CarbonEnv(gym.Env):
    """
    Description :

    A custom openai gym environment for the carbon emission problem.

    """

    def __init__(self, data_dict):
        super().__init__()

        self.data = data_dict
        # get fleet info
        self.fleet = pd.read_csv(self.data['fleet_path'])
        # get port info
        self.ports = pd.read_csv(self.data['ports_path'])
        # get distance matrix
        self.dm = pd.read_csv(self.data['dm_path'])
        
        NUM_SHIPS = len(self.fleet)
        NUM_PORTS = len(self.ports)
        NUM_DAILY_CONTRACTS = NUM_SHIPS * NUM_PORTS
        SET_OF_SPEEDS = [10, 12, 14]
        NUM_SPEEDS = len(SET_OF_SPEEDS)


        # the observation space changes daily based on the step==1 day
        observation_space = spaces.Dict({
            "contracts": spaces.Discrete(NUM_DAILY_CONTRACTS),
            "ships": spaces.Discrete(NUM_SHIPS)
        })

        # action_space = spaces.Dict({
        #     # "choose_ship": spaces.Discrete(NUM_SHIPS+1),
        #     # we loop on every ship and take actions on each of the ships
        #     # using +1 to account for the case of not choosing a contract
        #     # for each ship we must choose:
        #     # * which contract to take among the available which are at most NUM_CONTRACTS+1
        #     # * which speed to use for the trip
        #     "choose_contract": spaces.Discrete(NUM_YEARLY_CONTRACTS+1),
        #     "choose_speed": spaces.Discrete(NUM_SPEEDS+1)
        # })

        # The action space should be described in a daily manner as well
        action_space = spaces.Dict({
            # we loop over the ships for the contracts of the day specified by the step
            # the actions we can take for each ship are:
            "choose_contract": spaces.Discrete(NUM_DAILY_CONTRACTS),
            "choose_speed":spaces.Discrete(NUM_SPEEDS)

        })

        self.reset()
        

    def step(self, action):
        """
        `step` takes a step into the environment

        Returns:
        * obs: The observation produced by the action of the agent
        * reward: The reward produced by the action of the agent
        * done: A flag signaling if the game ended
        * info : A dict useful for debugging


        
        """
        
        pass

    def reset(self):
        """
        `reset` sets the environment to its initial state

        Returns:
        * initial_state : the initial state / observation of the environment.

        """
        
        self.info={}
        self.done = False

        # Set the fleet to its initial state
        self.fleet = pd.read_csv(self.data['fleet_path'])

        # Calculate fleet's required cii
        self.fleet['cii_threshold'] = self.fleet['dwt'].map(cii_expected)

        # set fleet at random ports
        self.fleet['current_port'] = np.random.randint(1, 11, self.fleet.shape[0])


        # create a fleet tensor from the fleet df
        self.fleet_tensor = self.create_tensor_fleet()

        # Create the contract tensor for the whole year
        self.con_tensor = self.create_tensor_contracts()

        # These contracts must be all passed to the mlp encoder

        
        # Moreover as part of the initial observation I should get:

        # 1. The 40 first tensor contracts
        # 2. The fleet tensor

        # Getting the 40 first tensor contract
        self.idx_min, self.idx_max = 0, 40
        self.con_first_40 = self.con_tensor[:,self.idx_min:self.idx_max,:]

        # the ships are also part of the initial state / observation

        initial_state = {"contracts":self.con_first_40,
                          "ships":self.fleet_tensor}


        return initial_state




    def create_contracts(self, day=1,seed=None):
        """
        `create_contracts` creats cargo contracts for a specific day of the year
        """
        con_df = pd.DataFrame(columns=['start_port_number','end_port_number','contract_type','start_day','end_day','cargo_size','contract_duration','port_distance','value'])
        ports = self.ports.number.to_numpy()
        ship_types =  np.array(['supramax','ultramax','panamax','kamsarmax'])
        con_df['start_port_number'] = np.repeat(ports,4)
        con_df['contract_type'] = np.tile(ship_types,10)
        num_contracts = len(ship_types) * len(ports)
        con_df['end_port_number'] = np.random.randint(low=1, high=11, size=(num_contracts,))
        same_ports = con_df['start_port_number'] == con_df['end_port_number']
        while sum(same_ports) != 0 :
            con_df['end_port_number'] = np.where(same_ports, np.random.randint(low=1, high=11, size=same_ports.shape),
                                                 con_df['end_port_number'])
            same_ports = con_df['start_port_number'] == con_df['end_port_number']
        
        # setting the start_day to the current day
        con_df['start_day'] = day

        # get distance between start and end ports arrays
        start_port_numbers_index = con_df['start_port_number'] - 1
        end_port_numbers_index = con_df['end_port_number']
        dist_df = self.dm.iloc[start_port_numbers_index,end_port_numbers_index]
        
        # the distance 
        con_df['port_distance'] = pd.Series(np.diag(dist_df)).reindex()

        # Create cargo size based on ship_type
        type_conditions = [con_df['contract_type'] == 'supramax',
                        con_df['contract_type'] == 'ultramax',
                        con_df['contract_type'] == 'panamax',
                        con_df['contract_type'] == 'kamsarmax']


        cargo_size_choices = [np.random.randint(40_000,50_000,type_conditions[0].shape),
                            np.random.randint(50_000,60_000,type_conditions[1].shape),
                            np.random.randint(60_000,70_000,type_conditions[2].shape),
                            np.random.randint(70_000,80_000,type_conditions[3].shape)]

        con_df['cargo_size'] = np.select(type_conditions,cargo_size_choices)


        ship_type_to_ship_code_choices = [np.ones(shape=type_conditions[0].shape),
                                2*np.ones(shape=type_conditions[1].shape),
                                3*np.ones(shape=type_conditions[2].shape),
                                4*np.ones(shape=type_conditions[3].shape)]

        con_df['contract_type'] = np.select(type_conditions,ship_type_to_ship_code_choices)



        # calculate duration

        # pick random speed from possible set of speeds
        u_picked = np.random.choice([10,12,14])

        # pick distance between ports from df
        dx = con_df['port_distance']

        # find duration of trip between ports with picked speed in hours
        dt_hours = ( dx / u_picked).round()

        # find duration of trip between ports in days
        dt_days = (dt_hours / 24).round()

        # get upper triangle entries of distance matrix
        x = self.dm.iloc[:,1:].to_numpy(dtype=np.int32)
        mask_upper = np.triu_indices_from(x,k=1)
        triu = x[mask_upper]

        # average voyage distance between ports in the distance matrix
        avg_dx = np.round(triu.mean())

        # average voyage duration between ports with picked speed in hours
        avg_dt_hours = np.round(avg_dx/u_picked)

        # # average voyage duration between ports with picked speed in days
        avg_dt_days = np.round(avg_dt_hours / 24)

        # total duration
        con_df['contract_duration'] = dt_days + avg_dt_days


        # end_day ends at 23:59
        con_df['end_day'] = con_df['start_day'] + con_df['contract_duration'] - 1

        # add contract value 
        con_df['value'] = round(con_df['cargo_size'] * (con_df['port_distance'] / (con_df['contract_duration'] * 1_000_000)))

        return con_df

    def create_tensor_contracts(self):
        """
        `create_tensor_contracts` creates a tensor out of the contracts dataframe
        """
        empty = pd.DataFrame(columns=['start_port_number','end_port_number','contract_type','start_day','end_day','cargo_size','contract_duration','port_distance','value'])
        contracts_df = empty.copy()
        for i in range(1,365+1):
            x = self.create_contracts(day=i)
            contracts_df = contracts_df.append(x, ignore_index=True)

        # convert everything to float for tensorflow compatibility
        contracts_df = contracts_df.astype(np.float32)

        # create the input tensor
        contracts_tensor = tf.convert_to_tensor(contracts_df)

        # add a batch size dimension
        contracts_tensor = tf.expand_dims(contracts_tensor,axis=0)

        return contracts_tensor

    def create_tensor_fleet(self):
        """
        `create_tensor_fleet` creates a tensor out of the fleets dataframe
        """
        # keeping only these features from the fleet df
        cols_to_keep = ['ship_number','dwt','cii_threshold','cii','current_port','current_speed','ship_availability']
        df = self.fleet[cols_to_keep]

        # converting to float for tensorflow compatibility
        df = df.astype(np.float32)

        # create the tensor
        tensor = tf.convert_to_tensor(df)

        # add a batch size dimension
        tensor = tf.expand_dims(tensor, axis=0) 

        return tensor

In [206]:
envo = CarbonEnv(data_dict)

<tf.Tensor: shape=(1, 14600, 9), dtype=float32, numpy=
array([[[  1.,   6.,   1., ...,   5., 399.,   3.],
        [  1.,   6.,   2., ...,   5., 399.,   5.],
        [  1.,   9.,   3., ...,   6., 696.,   7.],
        ...,
        [ 10.,   5.,   2., ...,   6., 425.,   4.],
        [ 10.,   7.,   3., ...,   8., 908.,   8.],
        [ 10.,   1.,   4., ...,   6., 600.,   8.]]], dtype=float32)>

In [204]:
r = envo.fleet
r.loc[:,cols_to_keep]

Unnamed: 0,ship_number,dwt,cii_threshold,cii,current_port,current_speed,ship_availability
0,1,50000,5.385183,0,3,0,1
1,2,60000,4.807837,0,5,0,1
2,3,70000,4.368264,0,5,0,1
3,4,80000,4.020109,0,4,0,1


In [167]:
k = envo.create_tensor_contracts()
k

<tf.Tensor: shape=(1, 14600, 9), dtype=float32, numpy=
array([[[  1.,   7.,   1., ...,   6., 492.,   3.],
        [  1.,  10.,   2., ...,   6., 600.,   5.],
        [  1.,   9.,   3., ...,   6., 696.,   7.],
        ...,
        [ 10.,   3.,   2., ...,   3., 124.,   2.],
        [ 10.,   7.,   3., ...,   6., 908.,  10.],
        [ 10.,   7.,   4., ...,   6., 908.,  11.]]], dtype=float32)>

In [191]:
idx_min,idx_max = 0,40
print(idx_min)
print(idx_max)


0
39


In [189]:
k[:,l_min:l_max,:]

<tf.Tensor: shape=(1, 40, 9), dtype=float32, numpy=
array([[[1.0000e+00, 7.0000e+00, 1.0000e+00, 1.0000e+00, 6.0000e+00,
         4.1247e+04, 6.0000e+00, 4.9200e+02, 3.0000e+00],
        [1.0000e+00, 1.0000e+01, 2.0000e+00, 1.0000e+00, 6.0000e+00,
         5.1158e+04, 6.0000e+00, 6.0000e+02, 5.0000e+00],
        [1.0000e+00, 9.0000e+00, 3.0000e+00, 1.0000e+00, 6.0000e+00,
         6.3763e+04, 6.0000e+00, 6.9600e+02, 7.0000e+00],
        [1.0000e+00, 4.0000e+00, 4.0000e+00, 1.0000e+00, 6.0000e+00,
         7.5483e+04, 6.0000e+00, 5.5100e+02, 7.0000e+00],
        [2.0000e+00, 3.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+01,
         4.5687e+04, 1.0000e+01, 1.6530e+03, 8.0000e+00],
        [2.0000e+00, 4.0000e+00, 2.0000e+00, 1.0000e+00, 1.3000e+01,
         5.4472e+04, 1.3000e+01, 2.6190e+03, 1.1000e+01],
        [2.0000e+00, 9.0000e+00, 3.0000e+00, 1.0000e+00, 1.4000e+01,
         6.5162e+04, 1.4000e+01, 2.7640e+03, 1.3000e+01],
        [2.0000e+00, 5.0000e+00, 4.0000e+00, 1.0000e+00, 9.

In [None]:
# training loop schema

# agent = GraphAttentionModel


# years = 3000 
# for i in range(years): 
#     init_obs = env.reset()
#     done = False 
#     score = 0
#     day = 1
#     while not done: # while year is not done
#         act = agent.choose_action(obs)
#         new_state, reward, done, info = env.step(act)
#         agent.remember(obs,act, reward, new_state, int(done))
#         agent.learn()
#         score += reward
#         day += 1
#         obs = new_state 

In [105]:
# encoder mlp

# see https://keras.io/examples/structured_data/structured_data_classification_from_scratch/
# see https://www.tensorflow.org/tutorials/generative/autoencoder
# Create an Integer Categorical Feature for contract_type