In [2]:
import pandas
import pulp
import geopandas # standard library for geographical data in Python
import numpy # standard library for numerical and array-based computing
import spopt # library for spatial optimisation

In [3]:
postcodes = pandas.read_csv("../data/uk_postcodes.csv")

# spatialize the data
postcodes = geopandas.GeoDataFrame(
    postcodes, 
    geometry=geopandas.points_from_xy(postcodes.lon, postcodes.lat, crs="epsg:4326")
)

bristol_postcodes = postcodes[postcodes.outward.str.startswith("BS")].copy()

# is the postcode in the center of the city? 
is_inner_city = bristol_postcodes.outward.str.lstrip("BS").astype(int) < 10

# if so, keep it around
bristol_innercity = bristol_postcodes[is_inner_city].copy()

# Tutorial

Now it is time for you to try your hand at solving $p$-Median problems. 

## One again, with feeling

<div class="alert alert-warning">
    
Using the methods we discussed above, can you create a new PMedian problem object to locate eight depots across all Bristol post codes? Are BS6 and BS8 chosen again in this new setup? It may help to make a map using the `.explore()` method. 

</div>

In [4]:
loc8 = spopt.locate.PMedian.from_geodataframe(
    gdf_fac = bristol_postcodes,
    gdf_demand= bristol_postcodes,
    demand_col= "geometry",
    facility_col="geometry",
    weights_cols="demand",
    p_facilities=8
)

In [5]:
solver = pulp.COIN_CMD(msg=False)

In [6]:
loc8.solve(solver)

<spopt.locate.p_median.PMedian at 0x163805790>

In [7]:
allocations = numpy.stack(loc8.cli2fac)

In [8]:
allocation_names = bristol_postcodes.outward.values[allocations]

In [9]:
bristol_postcodes.assign(
    allocation=allocation_names.squeeze()
).explore("allocation")

In [10]:
bristol_postcodes.assign(
    allocation=allocation_names.squeeze()
)[
    ['outward', 'geometry', 'demand', 'allocation']
].sort_values("allocation")

Unnamed: 0,outward,geometry,demand,allocation
264,BS1,POINT (-2.59188 51.46635),119.066167,BS1
303,BS7,POINT (-2.53285 51.48866),76.470352,BS1
302,BS6,POINT (-2.54754 51.46929),104.156384,BS1
301,BS5,POINT (-2.55413 51.46291),74.42422,BS1
298,BS43,POINT (-2.59519 51.44008),99.604888,BS1
297,BS41,POINT (-2.65298 51.42691),86.246961,BS1
295,BS4,POINT (-2.56031 51.43415),102.504517,BS1
286,BS3,POINT (-2.61897 51.42531),115.962676,BS1
305,BS8,POINT (-2.61842 51.45837),116.620348,BS1
275,BS2,POINT (-2.58969 51.45792),95.765799,BS1


## Keep the old solutions around

Unfortunately, between us discovering the first solution for inner-city postcodes and your new work looking at a *all* of Bristol, the depots in BS6 and BS8 were already built. Thus, we need to re-solve the problem for all of Bristol while *ensuring that* BS6 and BS8 are in the solution. We support this in `spopt` using a `predefined_facilities_col` argument. This column should contain `True` when an observation is *must* be selected for the solution, and `False` when it may is allowed to be omitted from the solution. 

In our case, build the column:

```python
bristol_postcodes['preselected'] = bristol_postcodes.outward.isin(("BS6", "BS8"))
```

and use that as the `predefined_facilities_col` argument in `spopt`.

<div class="alert alert-warning">
    
Using the methods we discussed above, can you create a new PMedian problem object to locate eight depots across all Bristol post codes, so that *BS6* and *BS8* are required to be selected as depots? how do the assigments differ between this case and your previous Bristol-wide solution? It may help to make a map using the `.explore()` method. 

</div>

In [11]:
bristol_postcodes['preselected'] = bristol_postcodes.outward.isin(("BS6", "BS8"))

In [12]:
bristol_postcodes.sort_values(
    "preselected", ascending=False
)[['outward', 'geometry', 'demand', 'preselected']].head()

Unnamed: 0,outward,geometry,demand,preselected
305,BS8,POINT (-2.61842 51.45837),116.620348,True
302,BS6,POINT (-2.54754 51.46929),104.156384,True
264,BS1,POINT (-2.59188 51.46635),119.066167,False
295,BS4,POINT (-2.56031 51.43415),102.504517,False
288,BS31,POINT (-2.49237 51.42438),120.839847,False


In [13]:
loc8_preselected = spopt.locate.PMedian.from_geodataframe(
    gdf_fac = bristol_postcodes,
    gdf_demand= bristol_postcodes,
    predefined_facility_col="preselected",
    demand_col= "geometry",
    facility_col="geometry",
    weights_cols="demand",
    p_facilities=8
)

In [14]:
loc8_preselected.solve(solver)

<spopt.locate.p_median.PMedian at 0x1651473d0>

In [15]:
allocations_forced = numpy.stack(loc8_preselected.cli2fac)
allocation_names_forced = bristol_postcodes.outward.values[allocations_forced]

In [16]:
bristol_postcodes.assign(
    allocation=allocation_names.squeeze(),
    allocation_forced=allocation_names_forced.squeeze()

)[
    ['outward', 'demand', 'allocation', 'allocation_forced']
].sort_values("allocation")

Unnamed: 0,outward,demand,allocation,allocation_forced
264,BS1,119.066167,BS1,BS8
303,BS7,76.470352,BS1,BS6
302,BS6,104.156384,BS1,BS6
301,BS5,74.42422,BS1,BS6
298,BS43,99.604888,BS1,BS8
297,BS41,86.246961,BS1,BS8
295,BS4,102.504517,BS1,BS6
286,BS3,115.962676,BS1,BS8
305,BS8,116.620348,BS1,BS8
275,BS2,95.765799,BS1,BS8


## Locate depots outside of the city

Land is too expensive in the city. Now, the supervisor wants to make sure that *no* facilities are selected in the city. One way to do this is to use all of the postcodes in *outer Bristol* as facilities, but keep all of the postcodes as demands. To get the postcodes for outer bristol, we invert the selection from inner Bristol:

```python
# in pandas, ~ inverts True/False statements
bristol_outercity = bristol_postcodes[~ is_inner_city].copy()
```

<div class="alert alert-warning">
    
Using the methods we discussed above, can you create a new PMedian problem object to locate eight depots in *outer Bristol* that service *all postcodes in Bristol*? How do the assigments differ between this case and your previous Bristol-wide solution? It may help to make a map using the `.explore()` method. 

</div>

In [17]:
bristol_outercity = bristol_postcodes[~is_inner_city].copy()

In [18]:
loc8_outer = spopt.locate.PMedian.from_geodataframe(
    gdf_fac = bristol_outercity,
    gdf_demand= bristol_postcodes,
    demand_col= "geometry",
    facility_col="geometry",
    weights_cols="demand",
    p_facilities=8
)

In [19]:
loc8_outer.solve(solver)

<spopt.locate.p_median.PMedian at 0x1654eded0>

In [20]:
allocations_outer = numpy.stack(loc8_outer.cli2fac)
allocation_names_outer = bristol_outercity.outward.values[allocations_outer]

In [21]:
bristol_postcodes.assign(
    allocation_outer = allocation_names_outer.squeeze(),
    allocation_free = allocation_names.squeeze(),
)[
    ['outward', 'demand', 'allocation_free', 'allocation_outer']
].sort_values("allocation_free")

Unnamed: 0,outward,demand,allocation_free,allocation_outer
264,BS1,119.066167,BS1,BS13
303,BS7,76.470352,BS1,BS13
302,BS6,104.156384,BS1,BS13
301,BS5,74.42422,BS1,BS13
298,BS43,99.604888,BS1,BS13
297,BS41,86.246961,BS1,BS13
295,BS4,102.504517,BS1,BS13
286,BS3,115.962676,BS1,BS13
305,BS8,116.620348,BS1,BS13
275,BS2,95.765799,BS1,BS13


Note that all of the located facilities are outside of BS1-BS10, so they're "outer-city" postal codes! 

## Challenge: Bristol is far from an isometric plain! 

In [24]:
cost_table = pandas.read_csv("../data/cost_table.csv")

In [25]:
cost_matrix = cost_table.pivot(index="destination_postcode", columns="origin_postcode", values="cost")
cost_matrix.iloc[0:5, 0:5]

origin_postcode,BS10,BS11,BS12,BS13,BS14
destination_postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BS1,1077.4,672.8,473.3,286.0,771.4
BS10,0.0,682.8,447.7,652.0,1137.4
BS11,763.5,0.0,588.5,834.4,1164.3
BS12,787.4,619.6,0.0,585.1,1070.5
BS13,955.0,834.3,555.8,0.0,565.1


<div class="alert alert-warning">
    
Using `spopt.locate.PMedian.from_cost_matrix()` function, can you create a new PMedian problem object to locate eight depots in *outer Bristol* that service all postcodes in Bristol _using travel time_ for the cost matrix? How do the assigments differ between this case and your previous Bristol-wide solution? It may help to make a map using the `.explore()` method. 

</div>

In [26]:
loc8_traveltime = spopt.locate.PMedian.from_cost_matrix(
    cost_matrix.values, 
    bristol_postcodes.demand.values,
    p_facilities=8
)
                                      

In [27]:
loc8_traveltime.solve(solver)

<spopt.locate.p_median.PMedian at 0x16534c110>

In [28]:
allocations_traveltime = numpy.stack(loc8_traveltime.cli2fac)
allocation_names_traveltime = bristol_outercity.outward.values[allocations_traveltime]

In [29]:
bristol_postcodes.assign(
    allocation_traveltime = allocation_names_traveltime.squeeze(),
    allocation_free = allocation_names.squeeze(),
).eval("same_alloc = allocation_traveltime == allocation_free")[
    [
        'outward', 
        'demand', 
        'same_alloc', 
        'allocation_free',
        'allocation_traveltime'
    ]
].sort_values("allocation_free")

Unnamed: 0,outward,demand,same_alloc,allocation_free,allocation_traveltime
264,BS1,119.066167,False,BS1,BS13
303,BS7,76.470352,False,BS1,BS36
302,BS6,104.156384,False,BS1,BS13
301,BS5,74.42422,False,BS1,BS13
298,BS43,99.604888,False,BS1,BS13
297,BS41,86.246961,False,BS1,BS13
295,BS4,102.504517,False,BS1,BS13
286,BS3,115.962676,False,BS1,BS13
305,BS8,116.620348,False,BS1,BS13
275,BS2,95.765799,False,BS1,BS13


You can see that the use of travel time as a distance rather than euclidean distances makes a huge difference to the locational decision, and thus the allocations are also very different! Only five location-allocation decisions are the same between the two problems, and the only difference is the distance metric! 