### Libraries Install/Import

In [1]:
import numpy as np

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json

import csv

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

!conda install -c anaconda beautifulsoup4 --yes
import bs4
from bs4 import BeautifulSoup

!pip install opencage
from opencage.geocoder import OpenCageGeocode

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.0.0                |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-2.0.0-pyh9f0ad1d_0



Downloading and Extracting Packages
geopy-2.0.0          | 63 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ################################

### Data Import

In [15]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_Houston_neighborhoods').text

### Getting data out of HTML with BeatifulSoup

In [16]:
soup = BeautifulSoup(url)

### Selecting the table with the data to be used from Wikipedia

In [17]:
table = soup.find('table',{'class':'wikitable'})

### Creating and filling the lists 'neighborhood' and 'zone' with data from Wikipedia's table by looping

In [18]:
neighborhood = []
zone = []

rows = table.find_all('tr')

for row in rows:
    cells = row.find_all('td')
        
    if len(cells) > 1:
        nh = cells[1]
        neighborhood.append(nh.text.strip())
        
        zon = cells[2]
        zone.append(zon.text.strip())

### Creating a df with 'neighborhood' data and renaming the columns

In [19]:
df = pd.DataFrame(neighborhood)
df.rename(columns={0:"Neighborhood"}, inplace=True)
df['Neighborhood']=neighborhood
df['Zone']=zone
print(df.shape)
df.head(5)

(88, 2)


Unnamed: 0,Neighborhood,Zone
0,Willowbrook,Northwest
1,Greater Greenspoint,North
2,Carverdale,Northwest
3,Fairbanks / Northwest Crossing,Northwest
4,Greater Inwood,Northwest


### Cheking if there is any blank cells in the df

In [20]:
df.isna().any().any()

False

### Replacing Downtown neighborhood as it does not cointain location data ('-' for Downtown)

In [21]:
df.replace(('—', 'Downtown'), inplace= True)
print(df.shape)
df.head(5)

(88, 2)


Unnamed: 0,Neighborhood,Zone
0,Willowbrook,Northwest
1,Greater Greenspoint,North
2,Carverdale,Northwest
3,Fairbanks / Northwest Crossing,Northwest
4,Greater Inwood,Northwest


### Getting Houston's neighborhoods coordinates through geocode and adding the coordinates in the df

In [22]:
key = 'ec87056533ef4be38267bcd804f2e030'
geocoder = OpenCageGeocode(key)

lat = []
lon = []

for n in range(0,88):
    neighbor = df['Neighborhood'].values[n]
    address = '{}, Houston, US'.format(neighbor)
    result = geocoder.geocode(address, no_annotations="1")
    if result and len(result):
        longitude = result[0]['geometry']['lng']
        latitude = result[0]['geometry']['lat']
    else:
        longitude = 'N/A'
        latitude = 'N/A'
    
    lat.append(latitude)
    lon.append(longitude)
df['Latitude'] = lat
df['Longitude'] = lon
df.head()

Unnamed: 0,Neighborhood,Zone,Latitude,Longitude
0,Willowbrook,Northwest,29.660254,-95.456096
1,Greater Greenspoint,North,29.944719,-95.416074
2,Carverdale,Northwest,29.848687,-95.53945
3,Fairbanks / Northwest Crossing,Northwest,29.810174,-95.361049
4,Greater Inwood,Northwest,34.99427,-80.629656


### Getting Houston's coordinates

In [23]:
address = 'Houston'

geolocator = Nominatim(user_agent="hou_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Houston {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Houston 29.7589382, -95.3676974.


### Creating a map of Houston

In [27]:
map_houston = folium.Map(location=[latitude, longitude], zoom_start=4)

### Adding markers to Houston's map with its neighborhoods and coordinates

In [25]:
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, zone)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='hotpink',
        fill=True,
        fill_color='#FFB6C1',
        fill_opacity=0.7,
        parse_html=False).add_to(map_houston)  
    
map_houston

### Analyzing Houston's neighborhoods map

In [26]:
#Geocode is not bringing the right Latitude and Longitude for 7 neighborhoods: 
#Lake Houston,South Main, Meadowbrook,Greater Third,Acres Home, Greater Inwood and Clear Lake
#The wrong data will not be consider for the analisys so it will be dropped from the df
df.drop([43,34,74,66,4,5,80],0, inplace= True)
df.shape

(81, 4)

### Refreshing the map with the remaining neighborhoods

In [28]:
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, zone)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='hotpink',
        fill=True,
        fill_color='#FFB6C1',
        fill_opacity=0.7,
        parse_html=False).add_to(map_houston)  
    
map_houston

### Credentials for Foursquare API connection

In [29]:
# Credentials were removed due to user's privacy
CLIENT_ID = 'JDKH5OSPODJM2EYPXGWYVMEST4ALOMWSV4YJNXJYPCLQ1K3W'
CLIENT_SECRET = 'YPOBQ5CZL3HBERNQTHMGF0BBTWOSLPGXZKJBM2NI5505BBRB'
VERSION = '31122019'
LIMIT = 100

### Bringing the venues within 500 meters of Houston through Foursquare API

In [30]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lti, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lti, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lti, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    near_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    near_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(near_venues)

### Bringing the venues data for each neighborhood and creating a new DF

In [31]:
hou_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Willowbrook
Greater Greenspoint
Carverdale
Fairbanks / Northwest Crossing
Hidden Valley
Westbranch
Addicks / Park Ten
Spring Branch West
Langwood
Central Northwest (formerly Near Northwest)
Independence Heights
Lazybrook / Timbergrove
Greater Heights
Memorial
Eldridge / West Oaks
Briar Forest
Westchase
Mid-West (formerly Woodlake/Briarmeadow)
Greater Uptown
Washington Avenue Coalition / Memorial Park
Afton Oaks / River Oaks
Neartown / Montrose
Alief
Sharpstown
Gulfton
University Place
Westwood
Braeburn
Meyerland
Braeswood
Medical Center
Astrodome Area
Brays Oaks (formerly Greater Fondren S.W.)
Westbury
Willow Meadows / Willowbend
Fondren Gardens
Central Southwest
Fort Bend / Houston
IAH Airport
Kingwood
Northside / Northline
Jensen
East Little York / Homestead
Trinity / Houston Gardens
East Houston
Settegast
Northside Village
Kashmere Gardens
El Dorado / Oates Prairie
Hunterwood
Greater Fifth Ward
Denver Harbor / Port Houston
Pleasantville Area
Northshore
Clinton Park / Tri-Community
F

### Creating a new DF assigning 1 to each venue that exists in each neighborhood (get_dummies)

In [33]:
hou_onehot = pd.get_dummies(hou_venues[['Venue Category']], prefix="", prefix_sep="")

### Cleaning the DF hou_onehot

In [34]:
# add neighborhood column back to dataframe
hou_onehot['Neighborhood'] = hou_venues['Neighborhood'] 
print(hou_onehot.shape)
# move neighborhood column to the first column
#hou_onehot = hou_onehot[ ['Neighborhood'] + [ col for col in hou_onehot.columns if col != 'Neighborhood' ] ]

(810, 187)


### Bringing how many Neighborhoods have at least one venue

In [35]:
#dos 81 bairros,76 tem  venues
hou_group = hou_onehot.groupby('Neighborhood').mean().reset_index()
print(hou_group.shape)
hou_group.head(5)

(76, 187)


Unnamed: 0,Neighborhood,ATM,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beach,Beer Bar,Beer Garden,Bike Trail,Bistro,Boutique,Breakfast Spot,Brewery,Burger Joint,Business Service,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Carpet Store,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Baseball Diamond,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop,Duty-free Shop,Electronics Store,Empanada Restaurant,Event Service,Farmers Market,Fast Food Restaurant,Financial or Legal Service,Flower Shop,Food,Food Court,Food Service,Food Truck,Football Stadium,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gay Bar,General Entertainment,Gift Shop,Grocery Store,Gun Range,Gym,Gym / Fitness Center,Hardware Store,Health & Beauty Service,Historic Site,History Museum,Home Service,Hot Dog Joint,Hot Spring,Hotel,Hotel Bar,Hunan Restaurant,Hunting Supply,IT Services,Ice Cream Shop,Indian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Market,Martial Arts School,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Motorcycle Shop,Movie Theater,Moving Target,Museum,Music Store,Music Venue,New American Restaurant,Nightclub,Optical Shop,Other Great Outdoors,Other Repair Shop,Outdoor Supply Store,Outdoors & Recreation,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Police Station,Pool,Pub,Public Art,Record Shop,Rental Car Location,Rental Service,Restaurant,Salad Place,Salon / Barbershop,Salvadoran Restaurant,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shipping Store,Shoe Store,Skate Park,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Social Club,Southern / Soul Food Restaurant,Souvenir Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Track,Trail,Train Station,Travel & Transport,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Water Park,Wine Bar,Yoga Studio,Zoo Exhibit
0,Addicks / Park Ten,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Afton Oaks / River Oaks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Alief,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Astrodome Area,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0
4,Braeburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Puting venues in descending order

In [36]:
def return_most_common_venues(row, top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:top_venues]

### Creating a new df only with the column 'Japanese Restaurant'

In [37]:
hou_res= hou_group[['Neighborhood','Japanese Restaurant']]
hou_res.head(5)

Unnamed: 0,Neighborhood,Japanese Restaurant
0,Addicks / Park Ten,0.0
1,Afton Oaks / River Oaks,0.0
2,Alief,0.0
3,Astrodome Area,0.0
4,Braeburn,0.0


### Bringing which neighborhoods have Japanese restaurant

In [38]:
hou_japy= hou_res[hou_res['Japanese Restaurant'] > 0.000001]
print(hou_japy.shape)
hou_japy

(5, 2)


Unnamed: 0,Neighborhood,Japanese Restaurant
17,Eldridge / West Oaks,0.1
19,Fondren Gardens,0.041667
36,IAH Airport,0.032258
61,South Park,0.025
67,Trinity / Houston Gardens,0.047619


### Bringing which neighborhoods does not have Japanese restaurant

In [39]:
hou_japn= hou_res[hou_res['Japanese Restaurant'] < 0.000001]
print(hou_japn.shape)
hou_japn

(71, 2)


Unnamed: 0,Neighborhood,Japanese Restaurant
0,Addicks / Park Ten,0.0
1,Afton Oaks / River Oaks,0.0
2,Alief,0.0
3,Astrodome Area,0.0
4,Braeburn,0.0
5,Braeswood,0.0
6,Brays Oaks (formerly Greater Fondren S.W.),0.0
7,Briar Forest,0.0
8,Carverdale,0.0
9,Central Northwest (formerly Near Northwest),0.0


### Creating a new df with the TOP 5 neighborhoods by number of venues

In [41]:
hou_top= pd.DataFrame(hou_onehot['Neighborhood'].value_counts().head(5))
hou_top

Unnamed: 0,Neighborhood
IAH Airport,62
Medical Center,42
Sunnyside,42
Central Southwest,42
South Park,40


### Cleaning new data frame hou_top

In [42]:
#Renaming columns
hou_top.reset_index(drop= False, inplace= True)
hou_top.rename(columns={'index':'Neighborhood','Neighborhood':'Number of Venues'}, inplace= True)
hou_top

Unnamed: 0,Neighborhood,Number of Venues
0,IAH Airport,62
1,Medical Center,42
2,Sunnyside,42
3,Central Southwest,42
4,South Park,40


### Bringing the TOP 5 neighborhoods in number of venues that does not have Japanese restaurant

In [44]:
final_decision = pd.merge(hou_top, hou_japn, how='inner', on=['Neighborhood'])
final_decision

Unnamed: 0,Neighborhood,Number of Venues,Japanese Restaurant
0,Medical Center,42,0.0
1,Sunnyside,42,0.0
2,Central Southwest,42,0.0
