In [2]:
!conda install -c conda-forge beautifulsoup4
from bs4 import BeautifulSoup
!conda install -c conda-forge lxml
import requests
from geopy.geocoders import Nominatim
!conda install -c conda-forge folium
import folium
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - beautifulsoup4


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    beautifulsoup4-4.6.3       |           py35_0         139 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.6 MB

The following packages will be UPDATED:

    beautifulsoup4:  4.6.0-py35h442a8c9_1 --> 4.6.3-py35_0         conda-forge
    ca-certificates: 2019.1.23-0          --> 2019.6.16-hecc5488_0 conda-forge
    certifi:         2018.8.24-py35_1     --> 2018.8.24-py35_100

# Capstone Project: Recommendation of new tenant(s) to a commercial plaza
## 1. [Introduction/Business Problem](#part1)
## 2. [Data](#part2)
## 3. Methodology
## 4. Results
## 5. Discussion
## 6. Conclusion

<a id='part1'></a>
# 1. Introduction/Business Problem
## A client recently bought a commercial plaza in Lanham, Maryland. Most of the units in the plaza have already been under contract, but there is still one unit empty. He need to find a tenant to rent out the empty unit. In this data science project, I will recommend a venue (category) or a few venues (categories) for him to look into as his potential future tenant(s).

<a id='part2'></a>
# 2. Data
## The client's property is in Lanham, Maryland, which belongs to Prince George (PG) county. <br> So first, scrape all the cities/towns/Census-designated places in PG county from a Wikipedia page (https://en.wikipedia.org/wiki/Prince_George%27s_County,_Maryland#Communities)

In [3]:
source=requests.get('https://en.wikipedia.org/wiki/Prince_George%27s_County,_Maryland#Communities').text
soup=BeautifulSoup(source,'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Prince George's County, Maryland - Wikipedia
  </title>
  <script>
   document.documentElement.className=document.documentElement.className.replace(/(^|\s)client-nojs(\s|$)/,"$1client-js$2");RLCONF={"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Prince_George's_County,_Maryland","wgTitle":"Prince George's County, Maryland","wgCurRevisionId":903487116,"wgRevisionId":903487116,"wgArticleId":45454,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["CS1 maint: BOT: original-url status unknown","All articles with dead external links","Articles with dead external links from March 2018","Articles with permanently dead external links","Articles with dead external links from May 2017","Webarchive template wayback links","Use mdy dates from April 2018","Articles with short description"

In [4]:
prince_george_county=soup.find_all('li')#,class_='div-col columns column-width')
towns=[]
for item in prince_george_county:
    try:
        towns.append(item.a.text+', MD')
    except:
        pass
index_first=towns.index('Bowie, MD')
index_last=towns.index('Woodmore, MD')
prince_towns=towns[index_first:index_last]
len(prince_towns)

81

## Next, in order to utilize Foursquare location data, use Geocoder Python package to get the latitude and longitude for each city/town.

In [6]:
lat=[]
lng=[]
geolocator=Nominatim(user_agent='md_explorer')
for address in prince_towns:
    location=geolocator.geocode(address)
    try:
        lat.append(location.latitude)
        lng.append(location.longitude)
    except:
        lat.append(None)
        lng.append(None)
lat

[38.9429659,
 38.980666,
 38.8576128,
 38.9292782,
 38.9937333,
 38.9529442,
 39.0992752,
 38.9415,
 38.9698329,
 38.8962231,
 38.9939991,
 38.9392997,
 38.9431727,
 38.8853963,
 38.928167,
 38.9331669,
 38.9381668,
 38.5678986,
 38.9471169,
 38.9009452,
 38.8103511,
 38.9431668,
 38.8301133,
 38.9453888,
 38.9615103,
 38.9703884,
 38.8165274,
 38.6676168,
 39.0030666,
 38.8056605,
 38.5851203,
 38.6592847,
 39.0299081,
 38.6967833,
 None,
 39.0576091,
 38.8040027,
 38.6628956,
 38.9637218,
 38.7651145,
 38.8705297,
 38.7526153,
 38.9589741,
 38.9549372,
 38.8451131,
 38.7125305,
 38.7517812,
 38.8190021,
 38.9876104,
 39.0264985,
 38.8328909,
 38.8845568,
 39.0756446,
 38.9078897,
 38.9340002,
 38.9886035,
 38.9670551,
 38.8824538,
 38.836225,
 38.8165274,
 38.8334464,
 38.7739517,
 38.999347,
 38.9006205,
 38.78343595,
 38.8035961,
 38.8940802,
 38.919559,
 None,
 38.7567315,
 38.9678885,
 38.8417797,
 39.0698312,
 38.9376113,
 38.848724,
 38.8977194,
 38.8140024,
 38.8753903,
 39.10

## Create a pandas dataframe with columns of town, latitude, and longitude.
### - Note that the latitude and longitude are not available for some places. Just ignore them.

In [7]:
df_town=pd.DataFrame({'Town':prince_towns,'Latitude':lat,'Longitude':lng},columns=['Town','Latitude','Longitude'])
df_town.dropna(subset=['Latitude','Longitude'],inplace=True)
df_town.shape

(79, 3)

## Visualize the cities/towns on the map.

In [10]:
# create map 
address='4400 Forbes Blvd, Lanham, MD'
location=geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude
map_pg_county = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for latitude, longitude, label in zip(df_town['Latitude'], df_town['Longitude'], df_town['Town']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_pg_county)  
    
map_pg_county

### - Note that there are two places (Melwood and Queen Anne) are not correcly located on the map. Manually correct them.

In [9]:
#Correct Melwood and Queen Anne's location
#df_town[df_town['Town']=='Melwood, MD']
df_town.loc[62,'Latitude']=38.801944
df_town.loc[62,'Longitude']=-76.841667
#df_town[df_town['Town']=='Queen Anne, MD']
df_town.loc[67,'Latitude']=38.898611
df_town.loc[67,'Longitude']=-76.678333

## Use Foursquare location data to get venue information in all the cities/towns in PG county

In [11]:
# The code was removed by Watson Studio for sharing.

In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=3200,LIMIT=100):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        try: 
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])
        except:
            pass

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)

In [27]:
pg_venues = getNearbyVenues(names=df_town['Town'],
                                 latitudes=df_town['Latitude'],
                                 longitudes=df_town['Longitude']
                                )
pg_venues.head()

Bowie, MD
College Park, MD
District Heights, MD
Glenarden, MD
Greenbelt, MD
Hyattsville, MD
Laurel, MD
Mount Rainier, MD
New Carrollton, MD
Seat Pleasant, MD
Berwyn Heights, MD
Bladensburg, MD
Brentwood, MD
Capitol Heights, MD
Cheverly, MD
Colmar Manor, MD
Cottage City, MD
Eagle Harbor, MD
Edmonston, MD
Fairmount Heights, MD
Forest Heights, MD
Landover Hills, MD
Morningside, MD
North Brentwood, MD
Riverdale Park, MD
University Park, MD
Upper Marlboro, MD
Accokeek, MD
Adelphi, MD
Andrews AFB, MD
Aquasco, MD
Baden, MD
Beltsville, MD
Brandywine, MD
Calverton, MD
Camp Springs, MD
Cedarville, MD
Chillum, MD
Clinton, MD
Coral Hills, MD
Croom, MD
East Riverdale, MD
Fairwood, MD
Forestville, MD
Fort Washington, MD
Friendly, MD
Glassmanor, MD
Glenn Dale, MD
Hillandale, MD
Hillcrest Heights, MD
Kettering, MD
Konterra, MD
Lake Arbor, MD
Landover, MD
Langley Park, MD
Lanham, MD
Largo, MD
Marlboro Meadows, MD
Marlboro Village, MD
Marlow Heights, MD
Marlton, MD
Melwood, MD
Mitchellville, MD
National

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Bowie, MD",38.942966,-76.731234,Barnes & Noble,38.944206,-76.735447,Bookstore
1,"Bowie, MD",38.942966,-76.731234,Five Below,38.941971,-76.736021,Miscellaneous Shop
2,"Bowie, MD",38.942966,-76.731234,Five Guys,38.94359,-76.734322,Burger Joint
3,"Bowie, MD",38.942966,-76.731234,A.C. Moore Arts & Crafts,38.941518,-76.735489,Arts & Crafts Store
4,"Bowie, MD",38.942966,-76.731234,Jerry's Seafood,38.948838,-76.737431,Seafood Restaurant


In [28]:
pg_venues.shape

(5626, 7)