### Install wheels for Basemap
- install Proj: https://proj.org/install.html#install
- go to above link >> find Windows: click OSGeo4W >> download 64bit >> following above link's Window section to isntall PROJ
- install basemap wheel and pyproj wheel from link: https://www.lfd.uci.edu/~gohlke/pythonlibs/
- find: Basemap: a matplotlib toolkit for plotting 2D data on maps based on GEOS. 
- find: Pyproj: an interface to the PROJ library for cartographic transformations.
- #### Important: pip install numpy --upgrade ###

### Install wheels for geopandas 
Installing geopandas and its dependencies manually
refer to: https://stackoverflow.com/questions/34427788/how-to-successfully-install-pyproj-and-geopandas

Installing geopandas and its dependencies manually

1. First and most important: do not try to directly pip install or conda install any of the dependencies – if you do, they will fail in some way later, often silently or obscurely, making troubleshooting difficult. If any are already installed, uninstall them now.

2. Download the wheels for GDAL, Fiona, pyproj, rtree, and shapely from Gohlke. Make sure you choose the wheel files that match your architecture (64-bit) and Python version (2.7 or 3.5). If Gohlke mentions any prerequisites in his descriptions of those 5 packages, install the prerequisites now (there might be a C++ redistributable or something similar listed there)

3. If OSGeo4W, GDAL, Fiona, pyproj, rtree, or shapely is already installed, uninstall it now. The GDAL wheel contains a complete GDAL installation – don’t use it alongside OSGeo4W or other distributions.

4. Open a command prompt and change directories to the folder where you downloaded these 5 wheels.

5. pip install the GDAL wheel file you downloaded. Your actual command will be something like: pip install
GDAL-1.11.2-cp27-none-win_amd64.whl

6. Add the new GDAL path to the windows PATH environment variable, something like C:\Anaconda\Lib\site-packages\osgeo
pip install your Fiona wheel file, then your pyproj wheel file, then rtree, and then shapely.

7. Now that GDAL and geopandas’s dependencies are all installed, you can just pip install geopandas from the command prompt

# MilkRun Initial Routing Modeling

In [1]:
# import general packages:
from openpyxl import load_workbook
import win32com.client
import numpy as np
import pandas as pd
from pandas import Grouper
from pandas import Timestamp
import os
import io
import datetime as dt
import time 
import feather
import itertools
from math import sqrt
import csv
import dask.dataframe as dd
from datetime import datetime
import timestring
from IPython.core.display import display, HTML
from collections import Counter
from collections import defaultdict

# import modeling packages
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import KMeans
from sklearn import preprocessing, datasets
from sklearn.metrics import pairwise_distances_argmin
from scipy.spatial.distance import cdist,pdist
from scipy import stats
from scipy.sparse import *

# import visualization packages:
from matplotlib import pyplot as plt
# from mpl_toolkits.basemap import Basemap
import seaborn as sns
# import ggplot
%matplotlib inline

# checking path and dir
os.chdir('C:\\Users\\u279014\\Documents\\H_Drive\\7.AA Models\\12.Logistic_Optimization\\data')
os.getcwd()

'C:\\Users\\u279014\\Documents\\H_Drive\\7.AA Models\\12.Logistic_Optimization\\data'

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 500)

In [3]:
from __future__ import print_function
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp

In [4]:
def distance_on_sphere_numpy(coordinate_df):
    """
    Compute a distance matrix of the coordinates using a spherical metric.
    :param coordinate_array: numpy.ndarray with shape (n,2); latitude is in 1st col, longitude in 2nd.
    :returns distance_mat: numpy.ndarray with shape (n, n) containing distance in km between coords.
    """
    # Radius of the earth in km (GRS 80-Ellipsoid)
    EARTH_RADIUS = 6371.007176
    km2mile_ratio = 0.62137

    # Unpacking coordinates
    latitudes = coordinate_df.loc[:,'latitude']
    longitudes = coordinate_df.loc[:,'longitude']

    # Convert latitude and longitude to spherical coordinates in radians.
    degrees_to_radians = np.pi/180.0
    phi_values = (90.0 - latitudes)*degrees_to_radians
    theta_values = longitudes*degrees_to_radians

    # Expand phi_values and theta_values into grids
    theta_1, theta_2 = np.meshgrid(theta_values, theta_values)
    theta_diff_mat = theta_1 - theta_2

    phi_1, phi_2 = np.meshgrid(phi_values, phi_values)

    # Compute spherical distance from spherical coordinates
    angle = (np.sin(phi_1) * np.sin(phi_2) * np.cos(theta_diff_mat) + 
           np.cos(phi_1) * np.cos(phi_2))
    arc = np.arccos(angle)

    # Multiply by earth's radius to obtain distance in km
    return np.nan_to_num(arc * EARTH_RADIUS * km2mile_ratio)

In [5]:
def riding_distance(riding_distance_matrix, geo):
    """
    Compute a distance matrix of the coordinates using a spherical metric.
    :param  
        coordinate_df: numpy.ndarray with shape (n,n); riding_distance_matri: dataframe, col & index type: str 
        geo_zipcode: Data.Series, element type: str
    :returns distance_mat: numpy.ndarray with shape (n, n) containing distance in km between coords.
    """
    d_matrix = []
    zipcodes = geo['zip_code'].apply(lambda x: str(x))
    for i in zipcodes:
        d_row = []
        for j in zipcodes:
            d_row.append(riding_distance_matrix.loc[i,j])
        d_matrix.append(d_row)
    return np.asarray(d_matrix)

In [6]:
def load_riding_distance_matrix(path,file):
    riding_distance_matrix = pd.read_excel(os.path.join(path,file)).set_index('zipcode')
    riding_distance_matrix.columns = riding_distance_matrix.columns.astype('str')
    riding_distance_matrix.index = riding_distance_matrix.index.astype('str')
    return riding_distance_matrix

##  Modeling Start >>>>>>
## 1. Data_prep
### 1.1 load saved feather supplier-cluster dataset

### dictionary for osk_hub 

In [7]:
cass_zip_cluster = pd.read_csv('cass_zip_cluster.csv')

In [8]:
cluster_copy = cass_zip_cluster.copy() # make a copy of original dataset
cluster_copy = cluster_copy[cluster_copy.label != -1] # drop label(cluser) = -1, which do not belong to any group
cluster_copy['shipping_date'] = '10-01-2019'

In [9]:
cluster_copy['zip_code'] = cluster_copy.zip_code.astype('str')

In [10]:
cluster_copy.head()

Unnamed: 0,shipper_zip,unique_name,freq,ship_weight_freq_median,ship_weight_annum,shipment_count_annum,billed_amount_annum,zip_code,longitude,latitude,state_abbreviation,label,shipping_date
0,1752,aspensystem,monthly,261.0,3392.0,9.0,1194.81,1752,-71.54753,42.350909,MA,0,10-01-2019
1,1887,staffordmfg,monthly,200.0,200.0,1.0,86.97,1887,-71.17031,42.558576,MA,0,10-01-2019
2,1929,pendletonen,monthly,275.0,1715.0,5.0,672.84,1929,-70.77925,42.631753,MA,0,10-01-2019
3,1966,pendletonen,monthly,275.0,275.0,1.0,129.11,1966,-70.61727,42.659936,MA,0,10-01-2019
4,2062,exidetechnologies,monthly,390.0,780.0,2.0,234.02,2062,-71.20166,42.185974,MA,0,10-01-2019


In [11]:
cluster_copy = cluster_copy[cluster_copy.ship_weight_freq_median < 45000].reset_index(drop=True)

### 1.2 choose supplier-cluster to run milkrun Model

### Select top n supplier-cluster

In [12]:
rank = 2 # option for choosing supplier-cluster to run milkrun

freq_selected = 'monthly'
# label_no = Counter(cluster_copy.label).most_common()[rank-1][0]
label_no = Counter(cluster_copy[cluster_copy.freq == freq_selected]['label']).most_common()[rank-1][0]
cluster = cluster_copy[(cluster_copy.label == label_no) & (cluster_copy.freq == freq_selected)]

# only append Greenville WH with sliced clusering

greenville = pd.DataFrame([['54942', 'GREENVILLE_WH', 'weekly', 0, 0, 0, 0, '54942', -88.53557,44.293820, 'WI',999,'10-01-2019']], columns=cluster.columns)

chanbersburg = pd.DataFrame([['17201', 'CHANBERSBURG_WH', 'weekly', 0, 0, 0, 0, '17201', -77.6614, 39.93112,'PA',999,'01-01-2019']], columns=cluster.columns)

cass_zip_cluster_copy = greenville.append(cluster).reset_index(drop = True)

In [13]:
cass_zip_cluster_copy

Unnamed: 0,shipper_zip,unique_name,freq,ship_weight_freq_median,ship_weight_annum,shipment_count_annum,billed_amount_annum,zip_code,longitude,latitude,state_abbreviation,label,shipping_date
0,54942,GREENVILLE_WH,weekly,0.0,0.0,0.0,0.0,54942,-88.53557,44.29382,WI,999,10-01-2019
1,1752,aspensystem,monthly,261.0,3392.0,9.0,1194.81,1752,-71.54753,42.350909,MA,0,10-01-2019
2,1887,staffordmfg,monthly,200.0,200.0,1.0,86.97,1887,-71.17031,42.558576,MA,0,10-01-2019
3,1929,pendletonen,monthly,275.0,1715.0,5.0,672.84,1929,-70.77925,42.631753,MA,0,10-01-2019
4,1966,pendletonen,monthly,275.0,275.0,1.0,129.11,1966,-70.61727,42.659936,MA,0,10-01-2019
5,2062,exidetechnologies,monthly,390.0,780.0,2.0,234.02,2062,-71.20166,42.185974,MA,0,10-01-2019
6,2135,millerberndsystems,monthly,1410.0,1410.0,1.0,572.97,2135,-71.15349,42.348418,MA,0,10-01-2019
7,5201,jbmsherman,monthly,33258.0,312914.0,9.0,34362.12,5201,-73.17873,42.882231,VT,0,10-01-2019
8,5301,milcut,monthly,7500.0,7500.0,1.0,492.74,5301,-72.62328,42.849957,VT,0,10-01-2019
9,6010,westfaliano,monthly,260.0,3270.0,8.0,2165.13,6010,-72.93365,41.682249,CT,0,10-01-2019


### 1.3 Samples Initialization with small selections: 100 locations

In [14]:
path = r'C:\Users\u279014\Documents\H_Drive\7.AA Models\12.Logistic_Optimization\data'
file = r'riding_distance_matrix.xlsx'
riding_distance_matrix = load_riding_distance_matrix(path,file)

In [15]:
cass_zip_toy = cass_zip_cluster_copy[:100].reset_index(drop = True)

In [16]:
cass_zip_toy.shape

(20, 13)

In [17]:
distance_matrix_toy = riding_distance(riding_distance_matrix, cass_zip_toy)

KeyError: '1887'

In [18]:
distance_matrix_toy.shape

NameError: name 'distance_matrix_toy' is not defined

In [19]:
distance_matrix_toy = distance_on_sphere_numpy(cass_zip_toy)
df_distance_matrix = pd.DataFrame(distance_matrix_toy,index=cass_zip_toy.zip_code,columns=cass_zip_toy.zip_code)



In [20]:
df_distance_matrix.shape

(20, 20)

In [21]:
# re_name column name as previously version
replace_columns = ['shipper_zip', 'shipper_name', 'freq', 'ship_weight',
       'ship_weight_annum', 'shipment_count_annum', 'billed_amount_annum',
       'zip_code', 'longitude', 'latitude', 'state_abbreviation', 'label',
       'shipping_date']

cass_zip_toy.columns = replace_columns

In [22]:
unique_cass_zip_toy = cass_zip_toy.drop_duplicates(subset=['zip_code'])
# unique_distance_matrix_toy = riding_distance(riding_distance_matrix, unique_cass_zip_toy)
unique_distance_matrix_toy = distance_on_sphere_numpy(unique_cass_zip_toy)
df_unique_distance_matrix = pd.DataFrame(unique_distance_matrix_toy,
                                         index=unique_cass_zip_toy.zip_code,
                                         columns=unique_cass_zip_toy.zip_code)

ship_wight_list_toy = cass_zip_toy.ship_weight.tolist()
sum(ship_wight_list_toy)



52596.0

## 2. Model_Prep
### I. Initilizing Opt-model

In [None]:
def create_data_model(distance_matrix=0, 
                      ship_weight_list = 0, 
                      each_vehicle_capacity = 45000, 
                      num_vehicles = 30,
                      nrLocations = 9):
    """Stores the data for the problem."""
    data = {}
    data['distance_matrix']=distance_matrix
    data['demands'] = ship_weight_list
    data['vehicle_capacities'] = [each_vehicle_capacity]*num_vehicles
    data['num_vehicles'] = num_vehicles
    data['depot']=0
    data['nrLocations'] = nrLocations
    return data

### II. Customized model output_NCv-2

In [None]:
""" optimize algorithm for accurate route """
def print_solution_3(data, manager, routing, assignment):
    """Prints assignment on console."""
    total_distance = 0
    total_load = 0
    
    vehicle_routes = dict() # for list out the same truck pick zipcodes

    for vehicle_id in range(data['num_vehicles']):
        index = routing.Start(vehicle_id)
        plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
        plan_output_backward = 'Route for vehicle {}:\n'.format(vehicle_id) # if backward is shorter path
        route_distance = 0
        route_load = 0
        edge_distance = []
        while not routing.IsEnd(index):
            node_index = manager.IndexToNode(index)
            route_load += data['demands'][node_index]
            plan_output += ' {0} Load({1}) -> '.format(node_index, route_load)
            plan_output_backward += ' {0} Load({1}) <- '.format(node_index, route_load) # if backward is shorter path
            
            previous_index = index            
            index = assignment.Value(routing.NextVar(index))
            
            if vehicle_id in vehicle_routes:
                vehicle_routes[vehicle_id].append(node_index)   # adding zipcodes to same truck
            else:
                vehicle_routes[vehicle_id] = [node_index]
            
            route_distance += routing.GetArcCostForVehicle(previous_index, index, vehicle_id)
            edge_distance.append(routing.GetArcCostForVehicle(previous_index, index, vehicle_id))
        
        # adding destination to entire route

        """ this situation is Fudging Headacheeeeeeee"""
        # distance from greenville to first supplier is larger than last supplier to greenville, 
        # truck starts from first supplier, remove first span of driving from VRP
        if edge_distance[0] >= edge_distance[-1]:
            vehicle_routes[vehicle_id].append(0)
            vehicle_routes[vehicle_id].pop(0)
            route_distance = route_distance - edge_distance[0]
            plan_output += ' {0} Load({1})\n'.format(manager.IndexToNode(index),route_load)
            plan_output += 'Distance of the route: {} miles\n'.format(route_distance)
            plan_output += 'Load of the route: {}\n'.format(route_load)
            # print(plan_output)
            print(plan_output.replace('0 Load(0) ->  ',''))
            total_distance += route_distance
            total_load += route_load
        
        # truck starts form last supplier,remove last span of driving from VRP
        else:
            route_distance = route_distance - edge_distance[-1]
            vehicle_routes[vehicle_id] = vehicle_routes[vehicle_id][::-1]
            plan_output_backward += ' {0} Load({1})\n'.format(manager.IndexToNode(index),route_load)
            plan_output_backward += 'Distance of the route: {} miles\n'.format(route_distance)
            plan_output_backward += 'Load of the route: {}\n'.format(route_load)
            print(plan_output_backward)
            total_distance += route_distance
            total_load += route_load
    print('Total distance of all routes: {} miles'.format(total_distance))
    print('Total load of all routes: {}'.format(total_load))
    return vehicle_routes

### III. Running Opt_Medel: initialize truck_max_capacity & total truck_available

In [None]:
num_v = 30
num_stops = 7
v_capacity = 45000
n_route_location = 5

In [None]:
# Initiate data problem
_data = create_data_model(distance_matrix=distance_matrix_toy,
                         ship_weight_list=ship_wight_list_toy,
                         each_vehicle_capacity=v_capacity,
                         num_vehicles=num_v,
                        nrLocations=n_route_location)

In [None]:
# Create routing index manager
manager = pywrapcp.RoutingIndexManager(len(_data['distance_matrix']),_data['num_vehicles'],_data['depot'])

In [None]:
# Create Routing Model
routing = pywrapcp.RoutingModel(manager)

In [None]:
# Register transit callback
def distance_callback(from_index, to_index):
    from_node = manager.IndexToNode(from_index)
    to_node = manager.IndexToNode(to_index)
    return _data['distance_matrix'][from_node][to_node]

transit_callback_index = routing.RegisterTransitCallback(distance_callback)

In [None]:
# Define cost of each arch
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)

In [None]:
# dimension_name = 'Distance'
# routing.AddDimension(transit_callback_index,
#         0,  # no slack
#         int(np.sum(data['distance_matrix'])),  # vehicle maximum travel distance
#         True,  # start cumul to zero
#         dimension_name)
# distance_dimension = routing.GetDimensionOrDie(dimension_name)
# distance_dimension.SetGlobalSpanCostCoefficient(5)

## <<< try adding dimention for stops limitation >>>

In [None]:
# Add count_stops constraint
count_stop_callback = routing.RegisterUnaryTransitCallback(lambda index: 1)
dimension_name = 'Counter'
routing.AddDimension(count_stop_callback,
                     0,
                     v_capacity,
                     True,
                     'Counter'
                    )

In [None]:
counter_dimension = routing.GetDimensionOrDie(dimension_name)

# add sovler to count stop numbers  
for vehicle_id in range(num_v):
    index = routing.End(vehicle_id)
    solver = routing.solver()
    solver.Add(counter_dimension.CumulVar(index) <= num_stops)

#    solver.Add(counter_dimension.CumulVar(index).SetRange(3, 7)) 
#    Above >> [unsuccessful] set a range of stops  

In [None]:
# Add Capacity constraint
def demand_callback(from_index):
    from_code = manager.IndexToNode(from_index)
    return _data['demands'][from_code]

demand_callback_index = routing.RegisterUnaryTransitCallback(demand_callback)

routing.AddDimensionWithVehicleCapacity(demand_callback_index,
        0,  # null capacity slack
        _data['vehicle_capacities'],  # vehicle maximum capacities
        True,  # start cumul to zero
        'Capacity')

# Adding penalty for loading weight exceeds truck capacity
penalty = 1000
for node in range(1, len(_data['distance_matrix'])):
    routing.AddDisjunction([manager.NodeToIndex(node)], penalty)

In [None]:
# Setting first solution heuristic.
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)

In [None]:
# Solve the problem.
assignment = routing.SolveWithParameters(search_parameters)

In [None]:
if assignment:
    route_dictionary = print_solution_3(_data,manager,routing,assignment)

## 3. Result Visualization to PowerBI

In [None]:
route_dictionary

In [None]:
def route_schedule(route_dictionary):
    """ generat truck:pick_node map in dataFrame """
    df = pd.DataFrame()
    for k in route_dictionary.keys():
        if len(route_dictionary[k]) == 1: # this step eliminate dummy trucks like #0,#1 trucks doing nothing
            continue
        for v in route_dictionary[k]:
            df = df.append(pd.DataFrame({'truck_number':[k],'pick_node':[v]}))
    return df.reset_index(drop = True)

In [None]:
route_schedule = route_schedule(route_dictionary)

In [None]:
def route_schedule(route_dictionary):
    """ generat truck:pick_node map in dataFrame """
    df = pd.DataFrame()
    for k in route_dictionary.keys():
        if len(route_dictionary[k]) == 1: # this step eliminate dummy trucks like #0,#1 trucks doing nothing
            continue
        for v in route_dictionary[k]:
            df = pd.concat([df, pd.DataFrame({'truck_number':[k],'pick_node':[v]})])
    return df.reset_index(drop = True)

In [None]:
route_schedule = route_schedule(route_dictionary)

In [None]:
route_schedule

### Note: input of Graph must be unique distance matrix 

In [None]:
def distance_index(df,x):
    '''
    param:
        df: distance matrix with UNIQUE index & columns
        x: truck location source and truck location next-stop 
    return:
        DataFrame: distance matrix
    '''
    try:
        return df.loc[x[0],x[1]]
    except:
        return 0

In [None]:
cass_zip_toy

In [None]:
route_schedule

In [None]:
route_in_weight = route_schedule.merge(cass_zip_toy,left_on='pick_node',right_index=True,how='left')
route_in_weight['next_zip_code'] = route_in_weight.groupby(['truck_number'])['zip_code'].shift(-1)
route_in_weight['next_shipper_name'] = route_in_weight.groupby(['truck_number'])['shipper_name'].shift(-1)

route_in_weight['milk_run_distance'] = route_in_weight[['zip_code','next_zip_code']].apply(lambda x: round(distance_index(df_unique_distance_matrix,x)),axis=1)
route_in_weight['stop_number'] = route_in_weight.groupby('truck_number').cumcount()

In [None]:
route_in_weight

In [None]:
# delete later .... only for test purpose
route_in_weight.to_csv(r'C:\Users\u279014\Documents\H_Drive\7.AA Models\12.Logistic_Optimization\data\route_test.csv',index=True,index_label='time_sequence')

In [None]:
route_in_weight.to_csv(r'C:\Users\u279014\Documents\H_Drive\7.AA Models\12.Logistic_Optimization\data\route_in_weight.csv',index=True,index_label='time_sequence')
# route_in_weight.to_csv(r'S:\CORP-Share\DEPT\IT\DT-AA\FY20\GPSC\UseCases\8. Logistics Route Optimization\route_in_weight.csv',index=True,index_label='time_sequence')

##  Analytical Result: Miles & Cost Saving Comparison

In [None]:
# distance matrix
df_unique_distance_matrix

In [None]:
# routing work-order
route_in_weight[['truck_number','shipper_name','zip_code','milk_run_distance','next_shipper_name','next_zip_code','ship_weight','miles']]

In [None]:
route_in_weight.shipper_state.unique()

In [None]:
total_tmc_miles = route_in_weight.miles.sum()
total_milk_miles = route_in_weight.milk_run_distance.sum()
miles_saving = (total_tmc_miles-total_milk_miles)
print('-original_miles:{0} \n-milkrun_miles:{1}\n-miles reducton:{2}'.format(total_tmc_miles,total_milk_miles,miles_saving))

##  <<<<<<  Modeling Completed

## Financial Impact >>>>>>

In [None]:
def load_data(path,file,sheet_name = None):
    df = pd.read_excel(os.path.join(path,file),sheet_name=sheet_names)
    df = pd.concat(df[frame] for frame in df.keys())
    df.reset_index(drop=True, inplace=True)
    df.to_feather(os.path.join(path,'tmc_feather'))
    return feather.read_dataframe(os.path.join(path,'tmc_feather'))

In [None]:
path = r'C:\Users\u279014\Documents\H_Drive\7.AA Models\12.Logistic_Optimization\data'
file = r'TMC_freight_rate.xlsx'
sheet_names = ['Phase 1','Phase 2','Phase 3','Phase 4','Phase 5']

In [None]:
df = load_data(path=path,file=file,sheet_name=sheet_names)

In [None]:
# standardize dataframe colume names
def col_name(df):
    """
    this is to trim the data_frame column names to a unique format:
    all case, replace space to underscore, remove parentheses
    param df:
        raw from share drive for
    return:
        polished data set with new column names
    """
    df.columns = df.columns.str.strip().str.lower().str.replace('-','').str.replace(' ', '_').str.replace('(', '').\
                    str.replace(')', '').str.replace('"','')
    return df

In [None]:
""" Slice tmc """
def clean_tmc(df, sink_state = 'WI', source_states = 'IL'):
    """
    parameter: 
        df: original TMC dataset
        sink_state: destination warehouse, only one locations allowed
        source_states: shipping states, allowing multiple states as source state
    return:
        cleaned TMC including freight_cost from all states to sink_state
    """
    # starndardize col name
    df = col_name(df)
    
    # drop rows if all cols are nan
    df.dropna(how='all',subset=['market_rate_over_quarter_decmar',
       'market_rate_over_jan_2019mar_2020',
       'market_rate_all_offers_jan_2019_mar_2020_no_fb',
       'market_rate_all_offers_jan_2019_mar_2020_with_fb'],inplace=True)
    
    # generate freight_cost = market_rate_all_offers_jan_2019_mar_2020_no_fb or max of all
    df['freight_cost'] = np.round(np.where(df.market_rate_all_offers_jan_2019_mar_2020_no_fb.isnull(),
                               np.max(df,axis=1),
                               df.market_rate_all_offers_jan_2019_mar_2020_no_fb),2)  
    df['source_state'] = df.lane.apply(lambda x: x[:2]) # find source state short code
    df['sink_state'] = df.lane.apply(lambda x: x[-2:]) # find sink state short code
    
    df = df[df.source_state.isin(source_states)] # slice only source state
    df = df[df.sink_state.str.contains(sink_state)] # slice to include destination state only
    df = df.groupby(['source_state','sink_state'])['freight_cost'].mean().reset_index() # average duplidate states to same destination, 
    return df

In [None]:
# generate cleaned TMC dataset
source_states = cluster.shipper_state.unique()
tmc = clean_tmc(df, sink_state='WI', source_states = source_states)

In [None]:
tmc

In [None]:
# updating full truck load cost
route_in_weight['milk_run_cost'] = 0
TL_cost = np.max(tmc.freight_cost)
route_in_weight.loc[route_in_weight.groupby('truck_number').tail(1).index,'milk_run_cost'] = TL_cost
route_in_weight.to_csv(r'C:\Users\u279014\Documents\H_Drive\7.AA Models\12.Logistic_Optimization\data\route_in_weight.csv',index=True,index_label='time_sequence')

In [None]:
truck_used = len(route_in_weight.truck_number.unique())
total_tmc_billed = route_in_weight.billed_amount.sum()
total_milk_cost = round(np.max(tmc.freight_cost)*truck_used,2)
# total_milk_cost = round(float(tmc.freight_cost)*truck_used,2)
cost_saving = round((total_tmc_billed - total_milk_cost),2)
print('-original_cost:{0} \n-milkrun_cost:{1}\n-cost reducton:{2}'.format(total_tmc_billed,total_milk_cost,cost_saving))

### Add potential Oshkosh Hubs to the route

In [None]:
import sys
sys.path.insert(0, '../main')

In [None]:
import clustering_main as cm

In [None]:
def hub_dict(path, file, destination_list, route_in_weight, inbound_indicator='INBOUND'):
    """
    param:
        file: Cass FY19 Invoice Detail.csv
        inbound_indicator: str
        destination_list: list
    return:
        osk_hub_dict: dictionary, {supplier_name: [osk_warehouses...]
    """
    _data = cm.ETL_data(path=path).col_name(file=file)
    _data = _data[_data.inbound_outbound_indicator == inbound_indicator]
    df_hub_dict = _data[_data.destination_city.isin(destination_list)][['shipper_name', 'shipper_city', 'shipper_state', 'shipper_zip', 'destination_city', 'destination_state', 'destination_zip']]
    df_hub_dict = df_hub_dict.drop_duplicates(subset=['shipper_name', 'destination_city'])
    df_hub_dict = df_hub_dict[df_hub_dict.shipper_name.isin(set(route_in_weight.shipper_name))]
    df_hub_dict = df_hub_dict[df_hub_dict.shipper_zip.isin(set(route_in_weight.zip_code))]

    hub_dict = defaultdict(set)
    for sn, dc in zip(_data.shipper_name, _data.destination_city):
        if dc in destination_list:
            hub_dict[sn].add(dc)
        else:
            pass
    return hub_dict, df_hub_dict

In [None]:
path = 'C:\\Users\\u279014\\Documents\\H_Drive\\7.AA Models\\12.Logistic_Optimization\\data'
file = 'Cass FY19 Invoice Detail.csv'
destination_list = ['MILWAUKEE', 'OSHKOSH', 'GREENVILLE']

In [None]:
hub_dictionary, df_hub_dictionary = hub_dict(path=path, file=file, destination_list=destination_list, route_in_weight=route_in_weight)

In [None]:
df_hub_dictionary.to_csv('hub_dictionary.csv', index=False)

In [None]:
route_in_weight

In [None]:
df_hub_dictionary[df_hub_dictionary.shipper_name.isin(set(route_in_weight.shipper_name))]