### Battle of the Neighborhoods Denver

#### IBM Data Science capstone

### Introduction/Business Problem
#### Since medical and recreational marijuana have been a reality in Denver, the city has saw a 45% increase in the housing market since 2010. With swarms of people still moving to Denver in present day, my goal here is to check whether the concentration of dispensaries is driving up the cost of living in certain neighborhoods. I have data from the city of Denver on active dispensaries, highest cost of living neighborhoods, and an official list of recognized neighborhoods from the city of Denver.

In [4]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import urllib3

from bs4 import BeautifulSoup

from lxml import html
import lxml.html

import csv
import html5lib

import requests


#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')


Libraries imported.


### Importing the data for active medical and recreational marijuana dispensaries

In [92]:
active_biz_licenses_data = pd.read_csv('marijuana_active_business_licenses.csv')
active_biz_licenses_data.rename(columns={'Facility Zip Code': 'Zip Code'}, inplace=True)
active_biz_licenses_data.head()

Unnamed: 0,Business File Number,License Type,Entity Name,Trade Name,Current License Status,Expiration Date,Facility Street Number,Facility Pre-Direction,Facility Street Name,Facility Street Type,Facility Unit Number,Facility City,Zip Code
0,2017-BFN-0000990,Retail Marijuana Inf Prod Mfg,AMERICAN INDUSTRY LLC,DENVER CLONE STORE,License Issued - Active,10/2/2019 12:00:00 AM,4571,N,Ivy,ST,,Denver,80216.0
1,2010-BFN-1045906,Medical Marijuana Center,"LITTLE BROWN HOUSE DISPENSARY, LLC",LITTLE BROWNHOUSE,License Issued - Active,2/16/2020 12:00:00 AM,1995,S,BROADWAY,,,DENVER,80210.0
2,2010-BFN-1048204,Medical Marijuana Center,"RIVERROCK, LLC",RIVERROCK WELLNESS,License Issued - Active,9/11/2019 12:00:00 AM,4935,N,YORK,ST,,DENVER,80216.0
3,2010-BFN-1048159,Medical Marijuana Center,"DENVER DISPENSARY, LLC",DENVER DISPENSARY,License Issued - Active,9/11/2019 12:00:00 AM,4975,N,VASQUEZ,BLVD,,DENVER,80216.0
4,2012-BFN-1063644,Med Marijuana Opt Prem Cultiv,"DENVER PATIENTS GROUP, LLC",LIVWELL,License Issued - Active,9/16/2019 12:00:00 AM,5141,N,FRANKLIN,ST,,DENVER,80216.0


### Importing the data neighborhood breakdown

In [11]:
denver_neighborhoods = pd.read_csv('zips_neighborhoods.csv')
denver_neighborhoods.head()

Unnamed: 0,Zip Code,Neighborhood
0,80123,Marston
1,80202,Union Station
2,80203,"Speer, Capitol Hill, North Capitol Hill"
3,80204,"Auraria, Barnum, Barnum West, West Colfax, Slo..."
4,80205,"CBD, City Park West, Five Points, Clayton, Sky..."


### Pulling data on most expensive neighborhoods in Denver

In [43]:
source = requests.get('https://www.homesnacks.net/richest-neighborhoods-in-denver-128979/').text
soup = BeautifulSoup(source, 'lxml')

In [44]:
table = soup.find('table')
headers = [th.text for th in table.select("tr th")]
with open("cost_of_living_data.csv", "w") as f:
    wr = csv.writer(f)
    wr.writerow(headers)
    wr.writerows([[td.text.rstrip() for td in row.find_all("td")] for row in table.select("tr")])

In [45]:
cost_of_living_data = pd.read_csv('cost_of_living_data.csv')
cost_of_living_data.head()

Unnamed: 0,Rank,Neighborhood,Median Household Income
0,1,Cherry Creek,"$112,844"
1,2,Stapleton,"$97,079"
2,3,Washington Park,"$88,971"
3,4,Auraria,"$82,813"
4,5,Central East Denver,"$82,551"


### Converting Denver zipcode data from text to csv

In [25]:
txt_file = r"2018_Gaz_zcta_national.txt"
csv_file = r"zipcodes.csv"

with open(txt_file, 'r') as infile, open(csv_file, 'w') as outfile:
    
     stripped = (line.strip() for line in infile)
     lines = (line.split("\t") for line in stripped if line)
     writer = csv.writer(outfile)
     writer.writerows(lines)

### Cleaning and importing Denver zipcode data 

#### Since this was a mass list of every zip code, latitude, and longitude in America, some of the zip codes in order do not belong in Denver and therefore had to be removed.

In [86]:
zipcode_data = pd.read_csv('zipcodes.csv')
zipcode_data = zipcode_data.drop(zipcode_data.columns[[1, 2, 3, 4]], axis=1) 
zipcode_data.rename(columns={'GEOID': 'Zip Code', 'INTPTLAT': 'Latitude', 'INTPTLONG': 'Longitude'}, inplace=True)
denver_zipcodes = zipcode_data[27850:27904]
denver_zipcodes.reset_index(drop=True)
denver_zipcodes.drop(denver_zipcodes.index[1:16], inplace= True)
denver_zipcodes.reset_index(drop= True)
denver_zipcodes.drop(denver_zipcodes.index[11:13], inplace = True)
denver_zipcodes.drop(denver_zipcodes.index[22:24], inplace = True)
denver_zipcodes.drop(denver_zipcodes.index[23:26], inplace = True)
denver_zipcodes.drop(denver_zipcodes.index[19], inplace = True)
denver_zipcodes.drop(denver_zipcodes.index[27], inplace = True)
denver_zipcodes.reset_index(drop= True)


Unnamed: 0,Zip Code,Latitude,Longitude
0,80123,39.61601,-105.069449
1,80202,39.751908,-104.997633
2,80203,39.731686,-104.98265
3,80204,39.734837,-105.020395
4,80205,39.758857,-104.962829
5,80206,39.730284,-104.952595
6,80207,39.762299,-104.916581
7,80209,39.706581,-104.96575
8,80210,39.676626,-104.962315
9,80211,39.767444,-105.019736


### Combining neighborhoods with zip/lat/long info

In [87]:
denver_zip_merged = pd.merge(denver_zipcodes, denver_neighborhoods, on =['Zip Code'])
denver_zip_merged

Unnamed: 0,Zip Code,Latitude,Longitude,Neighborhood
0,80123,39.61601,-105.069449,Marston
1,80202,39.751908,-104.997633,Union Station
2,80203,39.731686,-104.98265,"Speer, Capitol Hill, North Capitol Hill"
3,80204,39.734837,-105.020395,"Auraria, Barnum, Barnum West, West Colfax, Slo..."
4,80205,39.758857,-104.962829,"CBD, City Park West, Five Points, Clayton, Sky..."
5,80206,39.730284,-104.952595,"Cherry Creek, Congress Park, City Park, Cheesm..."
6,80207,39.762299,-104.916581,"North Park Hill, Northeast Park Hill"
7,80209,39.706581,-104.96575,"Belcaro, Washington Park, Washington Park West..."
8,80210,39.676626,-104.962315,"Cory - Merrill, Washington Park, Washington Pa..."
9,80211,39.767444,-105.019736,"West Highland, Sloan Lake, Berkeley, Highland,..."


### Combining location data with active business data

In [94]:
denver_business_merged = pd.merge(denver_zip_merged, active_biz_licenses_data, on =['Zip Code'])
denver_business_merged.head()

Unnamed: 0,Zip Code,Latitude,Longitude,Neighborhood,Business File Number,License Type,Entity Name,Trade Name,Current License Status,Expiration Date,Facility Street Number,Facility Pre-Direction,Facility Street Name,Facility Street Type,Facility Unit Number,Facility City
0,80123,39.61601,-105.069449,Marston,2013-BFN-1068512,Medical Marijuana Center,"RJJ SHERIDAN, LLC",NATIVE ROOTS LITTLETON,License Issued - Active,10/3/2019 12:00:00 AM,7870,W,QUINCY,AVE,,DENVER
1,80123,39.61601,-105.069449,Marston,2014-BFN-0003949,Retail Marijuana Store,"RJJ SHERIDAN, LLC",NATIVE ROOTS LITTLETON,License Issued - Active,1/21/2020 12:00:00 AM,7870,W,QUINCY,AVE,,DENVER
2,80202,39.751908,-104.997633,Union Station,2010-BFN-1045824,Medical Marijuana Center,"DJR COLORADO, LLC",,License Issued - Active,9/10/2019 12:00:00 AM,1620,,MARKET,ST,5W,DENVER
3,80202,39.751908,-104.997633,Union Station,2013-BFN-1069174,Retail Marijuana Store,"DJR COLORADO, LLC",,License Issued - Active,2/10/2020 12:00:00 AM,1620,,Market,,5W,Denver
4,80202,39.751908,-104.997633,Union Station,2013-BFN-1069254,Retail Marij Opt. Prem. Cultiv,"1617 WAZEE STREET, LLC",LODO WELLNESS CENTER,License Issued - Active,1/1/2020 12:00:00 AM,1617,,WAZEE,ST,B,DENVER


### Get total of business licenses by neighborhood

### Displaying map of Denver

In [96]:
denver_map = folium.Map(location=[39.7392, 104.9903], zoom_start=11)

for lat, lng, label in zip(denver_business_merged['Latitude'], denver_business_merged['Longitude'], denver_business_merged['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        #parse_html=False
        ).add_to(denver_map)

#display map
denver_map