# Hub & Spoke Visuals

This notebook explores the visuals of the Team NYC dashboard after HC and NA process the raw AIS data. The visualizations here will be solely plotly so that we can easily port them over to the Dashboard. NC nicely describes our input dataframe "fullTrips" as a cleaned and processed dataframe where each row is a trip. A trip is defined as:

1. Spoke start port
2. Hub start port
3. Hub end port
4. Spoke end port

We use the available post-processed data to generate visuals in plotly here which we can plug-and-play and __Dash__.

### Dashboard Contents

* Hub and Spoke on a Map
* High level metrics (# trips, total carbon emission, # hubs)
* Trip scatterplot

*Note: We need to be extremely creative here. The nature of our model won't let us render too much on the map and so we can't rely on just a map to be the entire visual. Consider charts like bar charts, pie charts, time series, etc*

In [337]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from time import process_time
from sklearn.cluster import DBSCAN
import plotly.graph_objects as go
import plotly.express as px

In [246]:
"""
HELPERS
"""

def get_lat_lon_range(df):
    """Return the range of lat and lon in the data."""
    return [df['lat'].min(), df['lat'].max()], [df['lon'].min(), df['lon'].max()]

def get_scope(lat_range, lon_range):
    """Assign the proper scope based on range of data's lat/lon."""
    us_lat_rng = [24, 55]
    us_lon_rng = [-127, -50]
    na_lat_rng = [15, 85]
    na_lon_rng = [-170, -50]
    eu_lat_rng = [30, 80]
    eu_lon_rng = [-20, 70]
    sa_lat_rng = [-60, 12]
    sa_lon_rng = [-81, -34]

    if (lat_range[0] >= us_lat_rng[0] and lat_range[1] <= us_lat_rng[1]
            and lon_range[0] >= us_lon_rng[0] and lon_range[1] <= us_lon_rng[1]):
        scope = 'usa'
    elif (lat_range[0] >= na_lat_rng[0] and lat_range[1] <= na_lat_rng[1]
            and lon_range[0] >= na_lon_rng[0] and lon_range[1] <= na_lon_rng[1]):
        scope = 'north america'
    elif (lat_range[0] >= eu_lat_rng[0] and lat_range[1] <= eu_lat_rng[1]
          and lon_range[0] >= eu_lon_rng[0] and lon_range[1] <= eu_lon_rng[1]):
        scope = 'europe'
    elif (lat_range[0] >= sa_lat_rng[0] and lat_range[1] <= sa_lat_rng[1]
          and lon_range[0] >= sa_lon_rng[0] and lon_range[1] <= sa_lon_rng[1]):
        scope = 'south america'
    else:  # can add asia and africa
        scope = 'world'  # default
    return scope

In [349]:
"""
Load in data processed by HC and NA models.
"""

df_ports = pd.read_csv('portRef.csv')
display(df_ports.head())

df_full_trips = pd.read_csv('fullTrips.csv')
df_full_trips.head()

Unnamed: 0,Port_ID,CENTER_LON,CENTER_LAT
0,-1,-64.268406,18.234004
1,0,-63.04445,18.010393
2,1,-64.949977,18.334146
3,2,-64.597297,18.401753
4,3,-64.754876,17.696373


Unnamed: 0.1,Unnamed: 0,MMSI,VesselType,Length,Width,BaseDateTime_Start,LAT_SPOKEStartPort,LON_SPOKEStartPort,StartHUBPORT_PortID,StartHUBPORT_LON,StartHUBPORT_LAT,BaseDateTime_TripEnd,LAT_SPOKEEndPort,LON_SPOKEEndPort,ENDHUBPORT_PortID,ENDHUBPORT_LON,ENDHUBPORT_LAT
0,5728,209141000,1004.0,143.15,22.8,2017-12-04T19:06:12,18.02078,-63.11847,-1.0,-64.268406,18.234004,2017-12-05T10:48:45,18.00902,-63.04453,0,-63.04445,18.010393
1,6875,230185000,1004.0,108.35,17.47,2017-12-01T21:14:09,18.08363,-63.10032,-1.0,-64.268406,18.234004,2017-12-06T21:24:41,18.3357,-64.94765,1,-64.949977,18.334146
2,10939,230185000,1004.0,108.35,17.47,2017-12-06T21:24:41,18.3357,-64.94765,1.0,-64.949977,18.334146,2017-12-20T21:53:33,18.40139,-64.59435,2,-64.597297,18.401753
3,20215,230185000,1004.0,108.35,17.47,2017-12-20T21:53:33,18.40139,-64.59435,2.0,-64.597297,18.401753,2017-12-23T17:30:13,17.69702,-64.75512,3,-64.754876,17.696373
4,27463,230185000,1004.0,108.35,17.47,2017-12-23T17:30:13,17.69702,-64.75512,3.0,-64.754876,17.696373,2017-12-24T23:38:44,18.33567,-64.94773,1,-64.949977,18.334146


In [301]:
"""
Generates the data needed to funnel trip data into a plotly geoscatter map
"""

def gen_df_spokes_start(df_full_trips):
    df = df_full_trips[['MMSI',
                                 'BaseDateTime_Start',
                                 'LAT_SPOKEStartPort', 
                                 'LON_SPOKEStartPort']].copy()
    df['color'] = 'green'
    df.columns = ['mmsi', 'time', 'lat', 'lon', 'color']
    df['size'] = 8
    df['text'] = (df['mmsi'].apply(lambda x: "MMSI: %s" % x))
    df['name'] = 'Spoke Start'
    df.head()
    return df

def gen_df_hub_start(df_full_trips):
    df = df_full_trips[['MMSI', 
                            'StartHUBPORT_PortID', 
                            'StartHUBPORT_LON',
                             'StartHUBPORT_LAT']].copy()

    df['count'] = df['StartHUBPORT_PortID']
    df = df.groupby(['StartHUBPORT_PortID', 
                        'StartHUBPORT_LON',
                       'StartHUBPORT_LAT'], as_index=False).agg({'count': 'count'})
    df.sort_values(by='count', ascending=False)
    df.columns = ['port_id', 'lon', 'lat', 'count']
    df['color'] = 'orange'
    df['text'] = df['port_id'].apply(lambda x: """Port ID: %s""" % x)
    df['name'] = 'Hub Start'
    df['size'] = 8 + df['count']
    df.head()
    return df

def gen_df_hub_end(df_full_trips):
    df = df_full_trips[['MMSI', 
                                  'ENDHUBPORT_PortID', 
                                  'ENDHUBPORT_LON',
                                  'ENDHUBPORT_LAT']].copy()


    df['count'] = df['ENDHUBPORT_PortID']
    df = df.groupby(['ENDHUBPORT_PortID', 
                        'ENDHUBPORT_LON',
                       'ENDHUBPORT_LAT'], as_index=False).agg({'count': 'count'})
    df.sort_values(by='count', ascending=False)
    df['color'] = 'blue'
    df.columns = ['port_id', 'lon', 'lat', 'count', 'color']
    df['text'] = df['port_id'].apply(lambda x: """Port ID: %s""" % x)
    df['name'] = 'Hub End'
    df['size'] = 8 + df['count']
    df.head()
    return df

def gen_df_spoke_end(df_full_trips):
    df = df_full_trips[['MMSI', 
                                  'LAT_SPOKEEndPort',
                                  'LON_SPOKEEndPort',
                                 'BaseDateTime_TripEnd']].copy()
    df.columns = ['mmsi', 'lat', 'lon', 'time']
    df['color'] = 'red'
    df['size'] = 8
    df['text'] = (df['mmsi'].apply(lambda x: "MMSI: %s" % x))
    df['name'] = 'Spoke End'
    df.head()
    return df

In [336]:
"""
HUB AND Spoke Visual
"""

fig = go.Figure()


"""
Add hub and spokes
"""
df_fig_arr = [gen_df_spokes_start(df_full_trips),
             gen_df_hub_start(df_full_trips),
             gen_df_hub_end(df_full_trips),
             gen_df_spoke_end(df_full_trips)]

# Add locations
for df in df_fig_arr:
    df['shape'] = 'circle'
    fig.add_trace(go.Scattergeo(
        lon=df['lon'],
        lat=df['lat'],
        name=df['name'].values[0],
        text=df['text'],
        marker=dict(size=df['size'],
                    symbol=df['shape'],
                    color=df['color'],
                    line=dict(width=3, color='rgba(68, 68, 68, 0)')
                    )))
    
#fig.show()
print(df_full_trips.columns)

"""
Add paths
"""
for row in df_full_trips.itertuples():
    fig.add_trace(go.Scattergeo(
        lon=[row.LON_SPOKEStartPort, row.StartHUBPORT_LON],
        lat=[row.LAT_SPOKEStartPort, row.StartHUBPORT_LAT],
        mode='lines',
        line=dict(width=0.5, color='gray'),
        opacity=0.8,
        showlegend=False,
    ))
    
    fig.add_trace(go.Scattergeo(
        lon=[row.StartHUBPORT_LON, row.ENDHUBPORT_LON],
        lat=[row.StartHUBPORT_LAT, row.ENDHUBPORT_LAT],
        mode='lines',
        line=dict(width=0.5, color='gray'),
        opacity=0.8,
        showlegend=False,
    ))
    
    fig.add_trace(go.Scattergeo(
        lon=[row.ENDHUBPORT_LON, row.LON_SPOKEEndPort],
        lat=[row.ENDHUBPORT_LAT, row.LAT_SPOKEEndPort],
        mode='lines',
        line=dict(width=0.5, color='gray'),
        opacity=0.8,
        showlegend=False,
    ))
    


lat_range, lon_range = get_lat_lon_range(df_spokes_start)
scope = get_scope(lat_range, lon_range)
title = f'{scope.capitalize()} Hub and Spoke Network'
layout = dict(title=title,
              showlegend=True,
              geo=dict(
                  scope=scope,  # this is the only place we changed
                  projection = go.layout.geo.Projection(
                        scale=35,
                      type = 'azimuthal equal area'
                    ),
                  center={'lat': df_full_trips['LAT_SPOKEStartPort'].mean(), 
                          'lon': df_full_trips['LON_SPOKEStartPort'].mean()},
                    showland = False,
                    showlakes = False,
                    showocean = True,
                    landcolor = 'rgb(230, 145, 56)',
                    lakecolor = 'rgb(0, 255, 255)',
                    oceancolor = 'rgb(127,205,255)',
                    coastlinewidth = 5,
              )
             )
fig.update_layout(layout)

Index(['Unnamed: 0', 'MMSI', 'VesselType', 'Length', 'Width',
       'BaseDateTime_Start', 'LAT_SPOKEStartPort', 'LON_SPOKEStartPort',
       'StartHUBPORT_PortID', 'StartHUBPORT_LON', 'StartHUBPORT_LAT',
       'BaseDateTime_TripEnd', 'LAT_SPOKEEndPort', 'LON_SPOKEEndPort',
       'ENDHUBPORT_PortID', 'ENDHUBPORT_LON', 'ENDHUBPORT_LAT'],
      dtype='object')


In [339]:
"""
Individual Trip Emissions Scatterplot Visuals
"""

def generate_scatter_df(df_full_trips):
    df = df_full_trips[['Length', 'Width', 'VesselType']]
    df['CO2 Emitted'] = df['Length'].copy().apply(lambda x: x*np.random.random(1)[0])
    df.head()
    return df

fig = px.scatter_3d(generate_scatter_df(df_full_trips), 
                    x='Length', 
                    y='Width', 
                    z='CO2 Emitted',
                    color='VesselType', 
                    height=500)
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [348]:
"""
TEXT Visuals

These are just high level stats that we show in individual boxes
"""

def agg_metrics(df_full_trips):
    df = df_full_trips 
    data = {
        'number_of_trips': len(df_full_trips),
        'number_of_hubs': len(set(df_full_trips['StartHUBPORT_PortID'].unique()).union(
                            set(df_full_trips['ENDHUBPORT_PortID'].unique()))),
        'actual_co2_emission': 1337,
        'optimized_co2_emission': 337
    }

    return data


agg_metrics(df_full_trips)

{'number_of_trips': 73,
 'number_of_hubs': 14,
 'actual_co2_emission': 1337,
 'optimized_co2_emission': 337}