# Simulation programming project

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import geopy.distance
from scipy.optimize import linprog
import itertools


In [2]:
data = pd.read_csv('DemandAndFulfillmentLog.csv')

In [3]:
df = data.copy(deep=True)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5078 entries, 0 to 5077
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   RequestedDateTime  5078 non-null   object
 1   Rig                5078 non-null   object
 2   Material           5078 non-null   object
 3   QtyRequested       5078 non-null   int64 
 4   ReceivedDateTime   4837 non-null   object
 5   QtyReceived        5078 non-null   int64 
 6   Vessel             4879 non-null   object
 7   OriginPort         4879 non-null   object
dtypes: int64(2), object(6)
memory usage: 317.5+ KB


In [5]:
df.describe(include=['O'])

Unnamed: 0,RequestedDateTime,Rig,Material,ReceivedDateTime,Vessel,OriginPort
count,5078,5078,5078,4837,4879,4879
unique,373,4,6,4817,10,2
top,12/4/2021 0:00,BetaRig,DryBulk,7/24/2021 22:59,150C,HamburgPort
freq,19,1492,900,2,622,2463


In [6]:
df.describe(exclude=['O'])

Unnamed: 0,QtyRequested,QtyReceived
count,5078.0,5078.0
mean,1484.983655,1410.684915
std,1972.36042,1954.255474
min,1.0,0.0
25%,12.0,9.0
50%,60.0,32.0
75%,2500.0,2400.0
max,9800.0,9800.0


In [7]:
# format datetime

for col in ['RequestedDateTime', 'ReceivedDateTime'] :
    datetime = pd.to_datetime(df[col], errors='coerce', infer_datetime_format=True)
    df[col] = datetime
df

Unnamed: 0,RequestedDateTime,Rig,Material,QtyRequested,ReceivedDateTime,QtyReceived,Vessel,OriginPort
0,2021-01-01 08:00:00,AlphaRig,DeckCargo,5,2021-01-06 05:14:00,5,150A,RotterdamPort
1,2021-01-01 08:00:00,BetaRig,DeckCargo,7,2021-01-04 08:49:00,7,150B,RotterdamPort
2,2021-01-01 08:00:00,BetaRig,LiquidBulk,2000,2021-01-04 09:59:00,2000,150B,RotterdamPort
3,2021-01-01 08:00:00,BetaRig,Fuel,7700,2021-01-04 10:39:00,7700,150B,RotterdamPort
4,2021-01-01 08:00:00,BetaRig,DryBulk,2300,2021-01-04 14:30:00,2300,150B,RotterdamPort
...,...,...,...,...,...,...,...,...
5073,2021-12-31 08:00:00,CharlieRig,Fuel,1400,NaT,0,,
5074,2021-12-31 08:00:00,CharlieRig,Casing,9,NaT,0,,
5075,2021-12-31 08:00:00,CharlieRig,DryBulk,3100,NaT,0,,
5076,2021-12-31 08:00:00,DeltaRig,Casing,20,NaT,0,,


# Missing data

In [8]:
df[df['ReceivedDateTime'].isnull() == True]

Unnamed: 0,RequestedDateTime,Rig,Material,QtyRequested,ReceivedDateTime,QtyReceived,Vessel,OriginPort
4569,2021-11-25 00:00:00,DeltaRig,Casing,9,NaT,0,150I,HamburgPort
4570,2021-11-25 00:00:00,DeltaRig,DryBulk,2600,NaT,0,150I,HamburgPort
4580,2021-11-26 00:00:00,CharlieRig,DryBulk,2600,NaT,0,150G,HamburgPort
4581,2021-11-26 00:00:00,CharlieRig,Pipe,12,NaT,0,150G,HamburgPort
4583,2021-11-26 00:00:00,DeltaRig,LiquidBulk,2700,NaT,0,150I,HamburgPort
...,...,...,...,...,...,...,...,...
5073,2021-12-31 08:00:00,CharlieRig,Fuel,1400,NaT,0,,
5074,2021-12-31 08:00:00,CharlieRig,Casing,9,NaT,0,,
5075,2021-12-31 08:00:00,CharlieRig,DryBulk,3100,NaT,0,,
5076,2021-12-31 08:00:00,DeltaRig,Casing,20,NaT,0,,


There are 241 rows without ReceivedDateTime data and most are in the last months of the year  
some rows have Vessel and OriginPort data, which probably means that the order has not been delivered to the destination rig but the order has been assigned  
the other rows do not have Vessel and OriginPort data, which probably means that the order has not been assigned to any delivery attempt  
so, we are going to fill in missing ReceivedDateTime data for 2022-01-01 (assumed data collection date)

In [9]:
df['ReceivedDateTime'].fillna(value=pd.to_datetime('2022-01-01 00:00:00'), inplace=True)
df[-1:-10:-1]

Unnamed: 0,RequestedDateTime,Rig,Material,QtyRequested,ReceivedDateTime,QtyReceived,Vessel,OriginPort
5077,2021-12-31 08:00:00,DeltaRig,DryBulk,2600,2022-01-01,0,,
5076,2021-12-31 08:00:00,DeltaRig,Casing,20,2022-01-01,0,,
5075,2021-12-31 08:00:00,CharlieRig,DryBulk,3100,2022-01-01,0,,
5074,2021-12-31 08:00:00,CharlieRig,Casing,9,2022-01-01,0,,
5073,2021-12-31 08:00:00,CharlieRig,Fuel,1400,2022-01-01,0,,
5072,2021-12-31 08:00:00,CharlieRig,DeckCargo,1,2022-01-01,0,,
5071,2021-12-31 08:00:00,BetaRig,Casing,20,2022-01-01,0,150C,RotterdamPort
5070,2021-12-31 08:00:00,BetaRig,Fuel,2300,2022-01-01,0,150C,RotterdamPort
5069,2021-12-31 08:00:00,AlphaRig,Pipe,12,2022-01-01,0,,


In [10]:
df.to_csv('DemandAndFulfillmentLog_formatted.csv', index=False)

# Analysis
## Vessel type
There is a total of ten vessel  
Only vessel type **150** is currently being used by the company

In [11]:
df['Vessel'].value_counts()

150C    622
150A    621
150D    608
150B    565
150I    450
150F    432
150E    427
150G    403
150J    379
150H    372
Name: Vessel, dtype: int64

# Feature Engineering
In this cell, multiple columns are created to help with the analysis stage  
`TimeDiff` *datetime* : Total time spend between time of request and time of delivery  
`IsLate` *boolean* : True for "late delivery" (at least 5 days adter requested)  
`TimeLate` *datetime* : `TimeDiff` minus 5 days, clipped out negative values for in-time delivery  
`LatePenalty` *float* : Cost penalty for late delivery ($10,000 per hour)  
`QtyLost` *int* : Amount of cargo lost berfore delivery   
`Trip` *string* : Information about each delivery trip, comprise of OriginPort-Vessel-Rig

In [12]:
# function to group by request (orders from the same time in the same day is considered as the same request) and to calculate if the order is late or not
# by comparing the requested date with the minimum received date of the request

def late(df):
    grouped_request = df.groupby(['RequestedDateTime', 'Rig'])
    request = grouped_request.aggregate({'ReceivedDateTime':np.min, 'RequestedDateTime':np.min})
    request['TimeDiff'] = request['ReceivedDateTime'] - request['RequestedDateTime']
    request['IsLate'] = request['TimeDiff'] > pd.Timedelta('5 days')
    request['TimeLate'] = (request['TimeDiff'] - pd.Timedelta('5 days')).clip(lower='00:00:00')
    # late penalty of $10000 per hour if IsLate = true (a fraction of an hour late is calculated as one full hour late)
    request['LatePenalty'] = request['IsLate'] * (request['TimeLate'].dt.total_seconds() // 3600 + 1) * 10000
    return request

late(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,ReceivedDateTime,RequestedDateTime,TimeDiff,IsLate,TimeLate,LatePenalty
RequestedDateTime,Rig,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 08:00:00,AlphaRig,2021-01-06 05:14:00,2021-01-01 08:00:00,4 days 21:14:00,False,0 days 00:00:00,0.0
2021-01-01 08:00:00,BetaRig,2021-01-04 08:49:00,2021-01-01 08:00:00,3 days 00:49:00,False,0 days 00:00:00,0.0
2021-01-01 08:00:00,CharlieRig,2021-01-06 10:46:00,2021-01-01 08:00:00,5 days 02:46:00,True,0 days 02:46:00,30000.0
2021-01-01 08:00:00,DeltaRig,2021-01-07 01:02:00,2021-01-01 08:00:00,5 days 17:02:00,True,0 days 17:02:00,180000.0
2021-01-02 08:00:00,AlphaRig,2021-01-06 06:04:00,2021-01-02 08:00:00,3 days 22:04:00,False,0 days 00:00:00,0.0
...,...,...,...,...,...,...,...
2021-12-30 08:00:00,DeltaRig,2022-01-01 00:00:00,2021-12-30 08:00:00,1 days 16:00:00,False,0 days 00:00:00,0.0
2021-12-31 08:00:00,AlphaRig,2022-01-01 00:00:00,2021-12-31 08:00:00,0 days 16:00:00,False,0 days 00:00:00,0.0
2021-12-31 08:00:00,BetaRig,2022-01-01 00:00:00,2021-12-31 08:00:00,0 days 16:00:00,False,0 days 00:00:00,0.0
2021-12-31 08:00:00,CharlieRig,2022-01-01 00:00:00,2021-12-31 08:00:00,0 days 16:00:00,False,0 days 00:00:00,0.0


In [13]:
# total late penalty
late(df)['LatePenalty'].sum()

1477100000.0

In [14]:
with pd.ExcelWriter('grouped_req.xlsx', datetime_format='YYYY-MM-DD HH:MM:SS') as writer:
    late(df).to_excel(writer, merge_cells=True)

In [15]:
df['QtyLost'] = (df['QtyRequested'] - df['QtyReceived'])

## Boat trips
from crosstab analysis, Vessel 150A, 150B, 150CC, 150D is only being used for delivery between RotterdamPort and AlphaRig, BetaRig  
in contrast, Vessel 150E, 150F, 150G, 150H, 150I, 150J is only being used for delivery between HamburgPort and CharlieRig, DeltaRig

In [16]:
pd.crosstab(index=df['Rig'], columns=df['Vessel'])

Vessel,150A,150B,150C,150D,150E,150F,150G,150H,150I,150J
Rig,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AlphaRig,227,201,270,230,0,0,0,0,0,0
BetaRig,394,364,352,378,0,0,0,0,0,0
CharlieRig,0,0,0,0,240,259,229,123,273,188
DeltaRig,0,0,0,0,187,173,174,249,177,191


In [17]:
pd.crosstab(index=df['OriginPort'], columns=df['Vessel'])

Vessel,150A,150B,150C,150D,150E,150F,150G,150H,150I,150J
OriginPort,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
HamburgPort,0,0,0,0,427,432,403,372,450,379
RotterdamPort,621,565,622,608,0,0,0,0,0,0


In [18]:
sorted_receive = df.sort_values(by=['Rig', 'ReceivedDateTime'], ascending=True)

def select_rig_and_vessel(df, rig, vessel):
    return sorted_receive.loc[(df['Rig'].isin(rig)) & (df['Vessel'] == vessel), :].copy(deep=True)

selected_trip = select_rig_and_vessel(sorted_receive, ['CharlieRig', 'DeltaRig'], '150J')
selected_trip['ReceivedTimeDiff'] = selected_trip['ReceivedDateTime'].diff().dt.total_seconds() // 60
selected_trip['ReceivedTimeDiff'].fillna(value=0, inplace=True)
selected_trip['ReceivedTimeDiff'] = selected_trip['ReceivedTimeDiff'].clip(lower=0)
selected_trip


Unnamed: 0,RequestedDateTime,Rig,Material,QtyRequested,ReceivedDateTime,QtyReceived,Vessel,OriginPort,QtyLost,ReceivedTimeDiff
177,2021-01-13 08:00:00,CharlieRig,Fuel,7700,2021-01-18 16:44:00,7700,150J,HamburgPort,0,0.0
178,2021-01-13 08:00:00,CharlieRig,Casing,20,2021-01-18 20:35:00,20,150J,HamburgPort,0,231.0
188,2021-01-14 08:00:00,CharlieRig,DeckCargo,5,2021-01-19 00:55:00,5,150J,HamburgPort,0,260.0
189,2021-01-14 08:00:00,CharlieRig,LiquidBulk,1500,2021-01-19 01:45:00,1500,150J,HamburgPort,0,50.0
190,2021-01-14 08:00:00,CharlieRig,Fuel,1700,2021-01-19 02:15:00,1700,150J,HamburgPort,0,30.0
...,...,...,...,...,...,...,...,...,...,...
4486,2021-11-19 16:00:00,DeltaRig,DryBulk,3100,2021-12-28 08:27:00,3100,150J,HamburgPort,0,260.0
4487,2021-11-19 16:00:00,DeltaRig,Pipe,12,2021-12-28 09:42:00,12,150J,HamburgPort,0,75.0
4496,2021-11-20 00:00:00,DeltaRig,Fuel,7000,2021-12-28 12:42:00,7000,150J,HamburgPort,0,180.0
4977,2021-12-24 08:00:00,DeltaRig,Casing,9,2021-12-28 16:12:00,9,150J,HamburgPort,0,210.0


In [19]:
def loadTime(df) :
    cargo = df['Material']
    if cargo == 'DeckCargo' :
        return df['QtyReceived'] * 10
    elif cargo == 'DryBulk' :
        return round(df['QtyReceived'] / 2500 * 60, 0)
    elif cargo == 'Fuel' :
        return round(df['QtyReceived'] / 2000 * 60, 0)
    elif cargo == 'LiquidBulk' :
        return round(df['QtyReceived'] / 3000 * 60, 0)
    elif cargo == 'Pipe' :
        return df['QtyReceived'] * 15
    elif cargo == 'Casing' :
        return df['QtyReceived'] * 13
    else :
        return 0

selected_trip['LoadTime'] = selected_trip.apply(loadTime, axis=1).shift(1)


In [20]:
def trip_cutter(df) :
    if abs(df['LoadTime'] - df['ReceivedTimeDiff']) > 1 :
        return 1
    else :
        return 0
tripcutter = selected_trip.apply(trip_cutter, axis=1)
selected_trip['Grouping'] = tripcutter.cumsum()
selected_trip.to_csv('selected_trip.csv', index=False)

In [21]:
list_of_request = list()
request_qty = [0,0,0,0,0,0]
dict_of_material = {'DeckCargo':0, 'DryBulk':1, 'Fuel':2, 'LiquidBulk':3, 'Pipe':4, 'Casing':5}
group_number = 0
for row in selected_trip.itertuples() :
    if group_number != row.Grouping :
        list_of_request.append(request_qty)
        group_number = row.Grouping
        request_qty = [0,0,0,0,0,0]
    request_qty[dict_of_material[row.Material]] += row.QtyReceived
list_of_request.append(request_qty)

list_of_request

[[37, 2000, 11500, 1500, 0, 20],
 [24, 0, 0, 4800, 50, 20],
 [8, 2400, 2700, 1900, 58, 12],
 [32, 7800, 2500, 5800, 12, 9],
 [30, 4800, 0, 9400, 22, 9],
 [7, 1700, 9400, 2700, 0, 32],
 [7, 3000, 7000, 3500, 17, 20],
 [0, 10800, 5500, 3400, 0, 12],
 [10, 0, 14800, 0, 36, 0],
 [7, 5200, 9100, 1900, 18, 9],
 [7, 4500, 9200, 0, 30, 9],
 [7, 2900, 0, 3200, 78, 0],
 [1, 3100, 5500, 2700, 17, 29],
 [0, 0, 7100, 3300, 60, 0],
 [37, 5600, 0, 3400, 45, 9],
 [0, 2500, 16200, 0, 5, 12],
 [5, 6800, 4700, 0, 12, 32],
 [0, 4700, 0, 1700, 0, 61],
 [44, 0, 7600, 2400, 50, 0],
 [12, 5200, 1600, 2800, 45, 12],
 [6, 2300, 17000, 0, 0, 12],
 [5, 3200, 6600, 0, 10, 40],
 [7, 5300, 0, 9700, 35, 0],
 [18, 7400, 4800, 0, 30, 12],
 [7, 4700, 2900, 3300, 12, 32],
 [7, 2100, 12000, 1100, 23, 9],
 [32, 2200, 0, 5300, 36, 21],
 [1, 5300, 8600, 2900, 12, 12],
 [14, 2000, 6700, 4300, 17, 20],
 [21, 0, 4700, 3400, 65, 0],
 [5, 5200, 2700, 3800, 10, 32],
 [0, 3100, 0, 3900, 23, 41],
 [21, 3500, 10500, 2300, 0, 20],
 [7

In [22]:
constrain = [0, 0, 0, 0, 0, 0]
lt = np.array(list_of_request)
gt = np.array(list_of_request) * -1
combine = np.concatenate((lt, gt))
combine[0::2] = lt
combine[1::2] = gt
b = np.array([23000, -21700] * (combine.shape[0] // 2)).reshape(1, -1)
b.shape
combine.shape
result = linprog(constrain, combine, b, bounds=(0, None), method='revised simplex')
print(result)


     con: array([], dtype=float64)
     fun: 0.0
 message: 'Optimization terminated successfully.'
     nit: 67
   slack: array([ 1.30000000e+03, -7.27595761e-12,  1.30000000e+03,  0.00000000e+00,
        4.87592788e+02,  8.12407212e+02,  1.12285308e+03,  1.77146925e+02,
        1.05918137e+03,  2.40818627e+02,  9.31077987e+02,  3.68922013e+02,
        9.02686618e+02,  3.97313382e+02,  1.14247963e+03,  1.57520366e+02,
        7.82774815e+02,  5.17225185e+02,  1.04337375e+03,  2.56626252e+02,
        8.24639127e+02,  4.75360873e+02,  4.71227257e+02,  8.28772743e+02,
        8.71121995e+02,  4.28878005e+02,  5.81601988e+02,  7.18398012e+02,
        5.49853767e+02,  7.50146233e+02,  9.30221807e+02,  3.69778193e+02,
        6.02165217e+02,  6.97834783e+02,  4.80072050e+02,  8.19927950e+02,
        5.54811627e+02,  7.45188373e+02,  5.59987141e+02,  7.40012859e+02,
        1.07563713e+03,  2.24362869e+02,  4.79751716e+02,  8.20248284e+02,
        1.30000000e+03, -3.63797881e-12,  8.14960383e

In [23]:
def combine_trip(df):
    total_list = list()

    rig1, rig2 = [[['AlphaRig', 'BetaRig']], [['CharlieRig', 'DeltaRig']]]
    vessel1, vessel2 = [['150A', '150D'], ['150E','150F', '150G', '150J']]
    combination1 = list(itertools.product(rig1, vessel1))
    combination2 = list(itertools.product(rig2, vessel2))
    all_combinations = sum([combination1, combination2], [])

    for rig, vessel in all_combinations:
        sorted_receive = df.sort_values(by=['Rig', 'ReceivedDateTime'], ascending=True)
        selected_trip = select_rig_and_vessel(sorted_receive, rig, vessel)
        selected_trip['ReceivedTimeDiff'] = selected_trip['ReceivedDateTime'].diff().dt.total_seconds() // 60
        selected_trip['ReceivedTimeDiff'].fillna(value=0, inplace=True)
        selected_trip['ReceivedTimeDiff'] = selected_trip['ReceivedTimeDiff'].clip(lower=0)

        selected_trip['LoadTime'] = selected_trip.apply(loadTime, axis=1).shift(1)
        
        tripcutter = selected_trip.apply(trip_cutter, axis=1)
        selected_trip['Grouping'] = tripcutter.cumsum()

        list_of_request = list()
        request_qty = [0,0,0,0,0,0]
        dict_of_material = {'DeckCargo':0, 'DryBulk':1, 'Fuel':2, 'LiquidBulk':3, 'Pipe':4, 'Casing':5}
        group_number = 0
        for row in selected_trip.itertuples() :
            if group_number != row.Grouping :
                list_of_request.append(request_qty)
                group_number = row.Grouping
                request_qty = [0,0,0,0,0,0]
            request_qty[dict_of_material[row.Material]] += row.QtyReceived
        list_of_request.append(request_qty)
        total_list.append(list_of_request)

    return sum(total_list, [])


trips = combine_trip(df)
constrain = [0, 0, 0, 0, 0, 0]
lt = np.array(trips)
gt = np.array(trips) * -1
combine = np.concatenate((lt, gt))
combine[0::2] = lt
combine[1::2] = gt
b = np.array([23000, -21700] * (combine.shape[0] // 2)).reshape(1, -1)
print(combine.shape)
result = linprog(constrain, combine, b, bounds=(0, None), method='revised simplex')
print(result)

(716, 6)
     con: array([], dtype=float64)
     fun: 0.0
 message: 'The problem appears infeasible, as the phase one auxiliary problem terminated successfully with a residual of 1.7e+04, greater than the tolerance 1e-12 required for the solution to be considered feasible. Consider increasing the tolerance to be greater than 1.7e+04. If this tolerance is unnaceptably large, the problem is likely infeasible.'
     nit: 385
   slack: array([ 1.80142714e+03, -5.01427143e+02,  1.86235158e+03, -5.62351578e+02,
        1.63447880e+03, -3.34478803e+02,  2.08560511e+03, -7.85605115e+02,
        5.40363918e+02,  7.59636082e+02,  8.06530907e+01,  1.21934691e+03,
        3.08139468e+02,  9.91860532e+02,  9.83980720e+02,  3.16019280e+02,
        1.91680390e+03, -6.16803902e+02,  1.22008770e+03,  7.99122981e+01,
        2.23056160e+03, -9.30561595e+02,  1.51838525e+02,  1.14816147e+03,
        6.80475544e+02,  6.19524456e+02,  2.14377544e+03, -8.43775436e+02,
        9.31542924e+02,  3.68457076e+02

from the result, we can safely rounded the number of volume per unit (or bundle) of DeckCargo, Pipe, and Casing to be:  
DeckCargo = 55 m<sup>3</sup> / unit  
Pipe = 210 m<sup>3</sup> / bundle  
Casing = 270 m<sup>3</sup> / bundle  

reapply the numbers back into the original data  to check if it is indeed correct :  

In [24]:
result = np.array([55, 1, 1, 1, 210, 270])
original_data = np.array(combine_trip(df))

# if any of the result is out of bound (not within 90% to 100% of 23000 m^3) then the result will be false
print((original_data.dot(result) <= 23000).all() and (original_data.dot(result) >= 23000*0.9).all())

True


# Travel sequence path

In [85]:
nodedata = pd.read_csv('nodedata.csv')
node_df = nodedata.copy(deep=True)
node_df

Unnamed: 0,NodeName,Latitude,Longitude,NodeType,Region
0,HamburgPort,53.55562,9.98745,Port,The South
1,RotterdamPort,51.89566,4.35257,Port,The South
2,AlphaRig,54.45067,2.37275,Rig,The South
3,BetaRig,57.02179,-0.67314,Rig,The Central
4,CharlieRig,57.84155,0.8498,Rig,The Central
5,DeltaRig,59.42662,-5.24196,Rig,The North
6,Echo,54.00555,8.46451,Waypoint,The South
7,Foxtrot,54.45067,5.41863,Waypoint,The South
8,Golf,52.17672,3.51495,Waypoint,The South
9,Hotel,53.55562,3.89569,Waypoint,The South


In [86]:
pathdata = pd.read_csv('PathData.csv')
path_df = pathdata.copy(deep=True)
path_df

Unnamed: 0,StartNode,EndNode,Sequence
0,HamburgPort,RotterdamPort,Echo - Foxtrot - Hotel - Golf
1,HamburgPort,AlphaRig,Echo - Foxtrot
2,HamburgPort,BetaRig,Echo - Foxtrot - India
3,HamburgPort,CharlieRig,Echo - Lima
4,HamburgPort,DeltaRig,Echo - Lima - Juliett - Kilo
5,RotterdamPort,HamburgPort,Golf - Hotel - Foxtrot - Echo
6,RotterdamPort,AlphaRig,Golf - Hotel
7,RotterdamPort,BetaRig,Golf - Hotel - India
8,RotterdamPort,CharlieRig,Golf - Hotel
9,RotterdamPort,DeltaRig,Golf - Hotel - Juliett - Kilo


In [87]:
path_df['SeqList'] = path_df['StartNode'] + ' - ' + path_df['Sequence'] + ' - ' + path_df['EndNode']
path_df['SeqList'] = path_df['SeqList'].apply(lambda x: x.replace('Direct Route - ', ''))
path_df['SeqList'] = path_df['SeqList'].apply(lambda x: x.split(' - '))
path_df['SeqList']

0     [HamburgPort, Echo, Foxtrot, Hotel, Golf, Rott...
1                [HamburgPort, Echo, Foxtrot, AlphaRig]
2          [HamburgPort, Echo, Foxtrot, India, BetaRig]
3                 [HamburgPort, Echo, Lima, CharlieRig]
4     [HamburgPort, Echo, Lima, Juliett, Kilo, Delta...
5     [RotterdamPort, Golf, Hotel, Foxtrot, Echo, Ha...
6                [RotterdamPort, Golf, Hotel, AlphaRig]
7          [RotterdamPort, Golf, Hotel, India, BetaRig]
8              [RotterdamPort, Golf, Hotel, CharlieRig]
9     [RotterdamPort, Golf, Hotel, Juliett, Kilo, De...
10               [AlphaRig, Foxtrot, Echo, HamburgPort]
11               [AlphaRig, Hotel, Golf, RotterdamPort]
12                           [AlphaRig, India, BetaRig]
13                               [AlphaRig, CharlieRig]
14                  [AlphaRig, Juliett, Kilo, DeltaRig]
15         [BetaRig, India, Foxtrot, Echo, HamburgPort]
16         [BetaRig, India, Hotel, Golf, RotterdamPort]
17                           [BetaRig, India, Al

In [88]:
def distance_of_path(df) :
    distance = 0
    for i in range(1, len(df['SeqList'])) :
        lat1 = node_df.loc[node_df['NodeName'] == df['SeqList'][i-1], 'Latitude'].values[0]
        lon1 = node_df.loc[node_df['NodeName'] == df['SeqList'][i-1], 'Longitude'].values[0]
        lat2 = node_df.loc[node_df['NodeName'] == df['SeqList'][i], 'Latitude'].values[0]
        lon2 = node_df.loc[node_df['NodeName'] == df['SeqList'][i], 'Longitude'].values[0]
        distance += geopy.distance.geodesic((lat1, lon1), (lat2, lon2), ellipsoid='WGS-84').km
    # print(distance, df['SeqList'][0], df['SeqList'][i])
    return distance # total distance of path in km

path_df['TotalDistance'] = path_df.apply(distance_of_path, axis=1)
path_df

Unnamed: 0,StartNode,EndNode,Sequence,SeqList,TotalDistance
0,HamburgPort,RotterdamPort,Echo - Foxtrot - Hotel - Golf,"[HamburgPort, Echo, Foxtrot, Hotel, Golf, Rott...",678.957943
1,HamburgPort,AlphaRig,Echo - Foxtrot,"[HamburgPort, Echo, Foxtrot, AlphaRig]",514.452721
2,HamburgPort,BetaRig,Echo - Foxtrot - India,"[HamburgPort, Echo, Foxtrot, India, BetaRig]",797.285822
3,HamburgPort,CharlieRig,Echo - Lima,"[HamburgPort, Echo, Lima, CharlieRig]",756.192252
4,HamburgPort,DeltaRig,Echo - Lima - Juliett - Kilo,"[HamburgPort, Echo, Lima, Juliett, Kilo, Delta...",1245.837601
5,RotterdamPort,HamburgPort,Golf - Hotel - Foxtrot - Echo,"[RotterdamPort, Golf, Hotel, Foxtrot, Echo, Ha...",678.957943
6,RotterdamPort,AlphaRig,Golf - Hotel,"[RotterdamPort, Golf, Hotel, AlphaRig]",362.062182
7,RotterdamPort,BetaRig,Golf - Hotel - India,"[RotterdamPort, Golf, Hotel, India, BetaRig]",704.955207
8,RotterdamPort,CharlieRig,Golf - Hotel,"[RotterdamPort, Golf, Hotel, CharlieRig]",735.059776
9,RotterdamPort,DeltaRig,Golf - Hotel - Juliett - Kilo,"[RotterdamPort, Golf, Hotel, Juliett, Kilo, De...",1244.131716


In [123]:
graph = dict()
for row in path_df.itertuples() :
    # print(row.SeqList)
    for i in range(0, len(row.SeqList)) :
        if row.SeqList[i] not in graph :
            graph[row.SeqList[i]] = set()
        if i == 0 :
            graph[row.SeqList[i]].add(row.SeqList[i+1])
        else :
            self = row.SeqList[i-1:i+2:1][1]
            neighbor = row.SeqList[i-1:i+2:1]
            neighbor.remove(self)
            graph[row.SeqList[i]].update(neighbor)
        # print(graph[row.SeqList[i]])
        # print(row.SeqList[i-1:i], row.SeqList[i+1:i+2])
    
    # add additional challenge problem node
graph['DeltaRig'].update(['Mike'])
graph['Mike'] = set(['DeltaRig', 'BelfastPort'])
graph['BelfastPort'] = set(['Mike'])
print(graph)

{'HamburgPort': {'Echo'}, 'Echo': {'Lima', 'Foxtrot', 'HamburgPort'}, 'Foxtrot': {'Hotel', 'Echo', 'AlphaRig', 'India'}, 'Hotel': {'AlphaRig', 'India', 'Foxtrot', 'CharlieRig', 'Juliett', 'Golf'}, 'Golf': {'RotterdamPort', 'Hotel'}, 'RotterdamPort': {'Golf'}, 'AlphaRig': {'India', 'Foxtrot', 'CharlieRig', 'Juliett', 'Hotel'}, 'India': {'Hotel', 'Foxtrot', 'AlphaRig', 'BetaRig'}, 'BetaRig': {'India', 'CharlieRig', 'Juliett'}, 'Lima': {'Echo', 'CharlieRig', 'Juliett'}, 'CharlieRig': {'AlphaRig', 'BetaRig', 'Juliett', 'Hotel', 'Lima'}, 'Juliett': {'AlphaRig', 'BetaRig', 'CharlieRig', 'Kilo', 'Hotel', 'Lima'}, 'Kilo': {'DeltaRig', 'Juliett'}, 'DeltaRig': {'Mike', 'Kilo'}, 'Mike': {'DeltaRig', 'BelfastPort'}, 'BelfastPort': {'Mike'}}


In [143]:
def generate_edges(graph):
    edges = set()
    for node in graph:
        for neighbour in graph[node]:
            edges.add(frozenset([node, neighbour]))

    return edges

generate_edges(graph)

edgedata = {'StartNode': [], 'EndNode': []}
for edge in generate_edges(graph) :
    edge = list(edge)
    edgedata['StartNode'].append(edge[0])
    edgedata['EndNode'].append(edge[1])

EdgeData = pd.DataFrame(edgedata)

def distance_of_edge(df) :
    distance = 0
    lat1 = node_df.loc[node_df['NodeName'] == df['StartNode'], 'Latitude'].values[0]
    lon1 = node_df.loc[node_df['NodeName'] == df['StartNode'], 'Longitude'].values[0]
    lat2 = node_df.loc[node_df['NodeName'] == df['EndNode'], 'Latitude'].values[0]
    lon2 = node_df.loc[node_df['NodeName'] == df['EndNode'], 'Longitude'].values[0]
    distance += geopy.distance.geodesic((lat1, lon1), (lat2, lon2), ellipsoid='WGS-84').km
    return distance # total distance of edge in km

EdgeData['Distance'] = EdgeData.apply(distance_of_edge, axis=1)
EdgeData


Unnamed: 0,StartNode,EndNode,Distance
0,Golf,Hotel,155.575993
1,Hotel,Juliett,711.340874
2,India,Foxtrot,348.253375
3,CharlieRig,Juliett,197.46661
4,DeltaRig,Kilo,120.758369
5,AlphaRig,CharlieRig,389.215293
6,Mike,DeltaRig,403.948666
7,Lima,CharlieRig,333.256691
8,Kilo,Juliett,191.020726
9,Hotel,AlphaRig,141.050435


In [None]:
EdgeData.to_csv('EdgeData.csv', index=False)