# Imports

In [1]:
from geopy.distance import great_circle
import docplex.mp
import re

import requests
from bs4 import BeautifulSoup
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

import numpy as np

# Definitions

In [2]:
class XPoint(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    def __str__(self):
        return "P(%g_%g)" % (self.x, self.y)

class NamedPoint(XPoint):
    def __init__(self, name, x, y):
        XPoint.__init__(self, x, y)
        self.name = name
    def __str__(self):
        return self.name

def distance(p1, p2):
    return great_circle((p1.y, p1.x), (p2.y, p2.x)).km

def build_libraries_from_url(url, name_pos, lat_long_pos):
    import requests
    import json

    r = requests.get(url)
    myjson = json.loads(r.text, parse_constant='utf-8')
    myjson = myjson['data']

    libraries = []
    k = 1
    for location in myjson:
        uname = location[name_pos]
        try:
            latitude = float(location[lat_long_pos][1])
            longitude = float(location[lat_long_pos][2])
        except TypeError:
            latitude = longitude = None
        try:
            name = str(uname)
        except:
            name = "???"
        name = "P_%s_%d" % (name, k)
        if latitude and longitude:
            cp = NamedPoint(name, longitude, latitude)
            libraries.append(cp)
            k += 1
    return libraries


# Web Scraping Library Locations

In [3]:
chicago_libraries = build_libraries_from_url('https://data.cityofchicago.org/api/views/x8fc-8rcq/rows.json?accessType=DOWNLOAD',
                                   name_pos=10,lat_long_pos=16)

london_libraries = "https://www.allthelibraries.com/list"


In [4]:
lib_names = []
hyperlinks = []

for i in range(1, 2):
    
    print(f"Scraping library {i}")
    url = london_libraries

    response = requests.get(url)

    # Parse content
    content = response.content
    parsed_content = BeautifulSoup(content, 'html.parser')
    for para in parsed_content.find_all("div", {"class": "text_column"}): #text paragraph of review
        for para2 in para.find_all('ol',{'class' : 'inner_text_column'}):
            lib_names.append(para2.get_text())
    for para3 in parsed_content.find_all('a'):
        hyperlinks.append(para3.get('href'))
            

    
#     for para2 in parsed_content.find_all("div", {"class" : "review-stats"}): #table with useful data and ratings
#         for para3 in para2.find_all('td',{'class' : 'review-value'}):
#             table_data.append(para3.get_text())
#         print(table_data)

        
    print(f"   ---> {len(hyperlinks)} total links")

# df["reviews"] = reviews
# df["review date"] = dates

Scraping library 1
   ---> 163 total links


# Cleaning Scraped Data

In [5]:
filtered_lib = pd.DataFrame()

filtered_hyperlinks = list(filter(lambda x: any(j in x for j in ['http://www.allthelibraries.com/']), hyperlinks))[1:]
filtered_lib['filtered_hyperlinks'] = filtered_hyperlinks

library_name_filtered = filtered_lib['filtered_hyperlinks'].str.split('.com/',expand=True)[1]
filtered_lib['library_name_filtered'] = library_name_filtered

filtered_lib

Unnamed: 0,filtered_hyperlinks,library_name_filtered
0,http://www.allthelibraries.com/56a-Infoshop-Li...,56a-Infoshop-Library
1,http://www.allthelibraries.com/AA-School-of-Ar...,AA-School-of-Architecture-Library
2,http://www.allthelibraries.com/Aftab-Khan-Library,Aftab-Khan-Library
3,http://www.allthelibraries.com/Alexandra-Park-...,Alexandra-Park-Library
4,http://www.allthelibraries.com/Anthropology-Li...,Anthropology-Library-Research-Centre
...,...,...
70,http://www.allthelibraries.com/Vaughan-William...,Vaughan-Williams-Memorial-Library
71,http://www.allthelibraries.com/Warburg-Institu...,Warburg-Institute-Library
72,http://www.allthelibraries.com/Wellcome-Library,Wellcome-Library
73,http://www.allthelibraries.com/Westminster-Ref...,Westminster-Reference-Library


In [6]:
df = pd.DataFrame(lib_names, columns = ['lib_names'])

df = pd.DataFrame(df['lib_names'].str.replace('\t','').str.split('\n',expand=True).values.flatten()[:-1][1:], columns = ['cleaned_names'])
df


Unnamed: 0,cleaned_names
0,56a Infoshop Library
1,AA School of Architecture Library
2,Abbey Wood Library
3,ACAS Information Centre
4,Acton Town Hall Library
...,...
793,World Rugby Museum Library
794,Yeading Library
795,Yiewsley Library
796,York Gardens Library


# Extracting Location using geopy's Nominatim library

In [7]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="Geopy Library")

df["location"] = ""
df["latitude"] = ""
df["longitude"] = ""
libraries = []

for i in range(0,len(df)):
# for i in range(0,20):
    loc = geolocator.geocode(str(df['cleaned_names'][i]), country_codes = 'GB')
    if loc == None:
        loc = geolocator.geocode('London')
        
    longitude = loc.longitude
    latitude = loc.latitude

    df['location'][i] = loc
    df['latitude'][i] = latitude
    df['longitude'][i] = longitude
    
    cp = NamedPoint(str(df['cleaned_names'][i]), longitude, latitude)
    libraries.append(cp)
    

# Problem Statement

In [8]:
# check:

# print("There are %d public libraries in Chicago" % (len(df)))
# print("There are %d public libraries in Chicago" % (len(libraries)))

nb_shops = 10
print("We would like to open %d coffee shops" % nb_shops)


There are 798 public libraries in Chicago
There are 798 public libraries in Chicago


# Library Locations on Map using Folium

In [10]:
import folium
map_osm = folium.Map(location=[51.507446, -0.127765], zoom_start=9)
for i in range(0,len(df)):
    lt = df['latitude'][i]
    lg = df['longitude'][i]
    folium.Marker([lt, lg]).add_to(map_osm)
map_osm

# Building the DOCPLEX Model

In [11]:
from docplex.mp.environment import Environment
from docplex.mp.model import Model

mdl = Model("coffee shops")

In [12]:
BIGNUM = 999999999

# Ensure unique points
libraries = set(libraries)
# For simplicity, let's consider that coffee shops candidate locations are the same as libraries locations.
# That is: any library location can also be selected as a coffee shop.
coffeeshop_locations = libraries

# Decision vars
# Binary vars indicating which coffee shop locations will be actually selected
coffeeshop_vars = mdl.binary_var_dict(coffeeshop_locations, name="is_coffeeshop")
#
# Binary vars representing the "assigned" libraries for each coffee shop
link_vars = mdl.binary_var_matrix(coffeeshop_locations, libraries, "link")





# Adding Constraints 
(adapted from: https://dataplatform.cloud.ibm.com/exchange/public/entry/view/aceccfd155454fd9741852e12e9cce4e?context=cpdaas)

In [13]:
for i in coffeeshop_locations:
    for j in libraries:
        if distance(i, j) >= BIGNUM:
            mdl.add_constraint(link_vars[i, j] == 0, "ct_forbid_{0!s}_{1!s}".format(i, j))


In [14]:
mdl.add_constraints(link_vars[i, j] <= coffeeshop_vars[i] for j in libraries for i in coffeeshop_locations)
mdl.print_information()


Model: coffee shops
 - number of variables: 637602
   - binary=637602, integer=0, continuous=0
 - number of constraints: 636804
   - linear=636804
 - parameters: defaults
 - objective: none
 - problem type is: MILP


In [15]:
mdl.add_constraints(mdl.sum(link_vars[i, j] for i in coffeeshop_locations) == 1 for j in libraries)
mdl.print_information()


Model: coffee shops
 - number of variables: 637602
   - binary=637602, integer=0, continuous=0
 - number of constraints: 637602
   - linear=637602
 - parameters: defaults
 - objective: none
 - problem type is: MILP


In [16]:
mdl.add_constraint(mdl.sum(coffeeshop_vars[i] for i in coffeeshop_locations) == nb_shops)
mdl.print_information()


Model: coffee shops
 - number of variables: 637602
   - binary=637602, integer=0, continuous=0
 - number of constraints: 637603
   - linear=637603
 - parameters: defaults
 - objective: none
 - problem type is: MILP


# Solving the Model

In [17]:
total_distance = mdl.sum(link_vars[i, j] * distance(i, j) for i in coffeeshop_locations for j in libraries)
mdl.minimize(total_distance)


In [18]:
print("# coffee shops locations = %d" % len(coffeeshop_locations))
print("# coffee shops           = %d" % nb_shops)


# coffee shops locations = 798
# coffee shops           = 10


# Solve the model
(need non-community version to execute!)

In [22]:
assert mdl.solve(), "!!! Solve of the model fails"

DOcplexLimitsExceeded: **** Promotional version. Problem size limits (1000 vars, 1000 consts) exceeded, model has 637602 vars, 637603 consts, CPLEX code=1016

# Plot Library and Coffee Shop Locations on Map Using Folium

In [20]:
total_distance = mdl.objective_value
open_coffeeshops = [i for i in coffeeshop_locations if coffeeshop_vars[i].solution_value == 1]
not_coffeeshops = [i for i in coffeeshop_locations if i not in open_coffeeshops]
edges = [(i, j) for j in libraries for i in coffeeshop_locations if int(link_vars[i, j]) == 1]

print("Total distance = %g" % total_distance)
print("# coffee shops  = {0}".format(len(open_coffeeshops)))
for c in open_coffeeshops:
    print("new coffee shop: {0!s}".format(c))
# Displaying the solution
# Coffee shops are highlighted in red.

import folium
map_osm = folium.Map(location=[51.507446, -0.127765], zoom_start=9)
for coffeeshop in open_coffeeshops:
    lt = coffeeshop.y
    lg = coffeeshop.x
    folium.Marker([lt, lg], icon=folium.Icon(color='red',icon='info-sign')).add_to(map_osm)
    
for j in libraries:
    if j not in open_coffeeshops:
        lt = j.y
        lg = j.x
        folium.Marker([lt, lg]).add_to(map_osm)
    

for (c, j) in edges:
    coordinates = [[c.y, c.x], [j.y, j.x]]
    map_osm.add_child(folium.PolyLine(coordinates, color='blue', weight=5))

map_osm

DOcplexException: Model<coffee shops> did not solve successfully