In [52]:
import requests
from bs4 import BeautifulSoup
import re
import json
from collections import defaultdict 
import time
from pandas import DataFrame
from geopy.geocoders import Nominatim
from statistics import mean

In [2]:
class NextDoorInfo:
    def __init__(self):
        self.base_url = 'https://nextdoor.com/find-neighborhood/ca/'
        self.all_city_url = []
        self.all_neighborhood_url = []
        self.city_name_pattern = "(?<=city: \").*(?=\",)"
        self.neighbor_name_pattern = "(?<=shortName: \").*(?=\",)"
        self.attributes_pattern = "(?<=attributes: ).*(?=,)"
        self.final_result = []
        self.cursor = 0
        
    def get_base_page(self):
        self.page = requests.get(self.base_url)
        self.soup = BeautifulSoup(self.page.content, 'html.parser')
        if self.soup != None:
            print("Got Page")
    
    def get_all_city(self):
        self.results = self.soup.find(id = "child_links")
        self.job_elems = self.results.find_all(class_='link')
        pattern = "(?<=city\/).*(?=\-\-ca)"
        for i in self.job_elems:
            self.all_city_url.append(i['href'])
        print("First 5 Cities")
        for n in range(5):
            print (self.all_city_url[n])

    def get_all_neighborhood(self):
        for city_url in self.all_city_url:
            print(city_url)
            temp_url = []
            
            try:
                city_page = requests.get(city_url)
                city_soup = BeautifulSoup(city_page.content, 'html.parser')
                city_results = city_soup.find(id = 'child_links')
                city_job_elems = city_results.find_all(class_='link')

                for i in city_job_elems:
                    temp_url.append(i['href'])
                    
            except Exception:
                print("No Neighbor")
                
            self.all_neighborhood_url.append(temp_url)
            
    def get_all_tags(self):
        for place in range(self.cursor,len(self.all_neighborhood_url)):
            result = []
            
            if self.all_neighborhood_url[place] != 0:
                for neighbor in self.all_neighborhood_url[place]:
                    sub_result = []
                    print(neighbor)
                    neighborhood_page = requests.get(neighbor)
                    neighborhood_soup = BeautifulSoup(neighborhood_page.content, 'html.parser')
                    try:
                        city_name = re.search(self.city_name_pattern,str(neighborhood_soup)).group(0)
                        neighbor_name = re.search(self.neighbor_name_pattern,str(neighborhood_soup)).group(0)
                        attributes = json.loads(re.search(self.attributes_pattern,str(neighborhood_soup)).group(0))
                        sub_result.append(city_name)
                        sub_result.append(self.all_city_url[place])
                        sub_result.append(neighbor_name)
                        sub_result.append(neighbor)
                        sub_result.append(attributes)
                    except AttributeError:
                        sub_result.append("Problem City")
                        sub_result.append(self.all_city_url[place])
                        sub_result.append("Problem Neighbor")
                        sub_result.append(neighbor)
                        sub_result.append("Problem Attributes")
                    result.append(sub_result)
            else:
                sub_result = []
                sub_result.append("City Name")
                sub_result.append(self.all_city_url[place])
                sub_result.append(None)
                sub_result.append(None)
                sub_result.append(None)
                result.append(sub_result)
        
            self.final_result.extend(result)
            self.cursor += 1
        
    def reset_cursor_n_results(self):
        self.cursor = 0
        self.final_result = []
        

In [3]:
test = NextDoorInfo()
test.get_base_page()
test.get_all_city()
print(len(test.all_city_url))

Got Page
First 5 Cities
https://nextdoor.com/city/acampo--ca/
https://nextdoor.com/city/acton--ca/
https://nextdoor.com/city/adelaide--ca/
https://nextdoor.com/city/adelanto--ca/
https://nextdoor.com/city/adin--ca/
1164


In [4]:
test.get_all_neighborhood()
print(len(test.all_neighborhood_url))

https://nextdoor.com/city/acampo--ca/
https://nextdoor.com/city/acton--ca/
https://nextdoor.com/city/adelaide--ca/
No Neighbor
https://nextdoor.com/city/adelanto--ca/
https://nextdoor.com/city/adin--ca/
No Neighbor
https://nextdoor.com/city/agoura-hills--ca/
https://nextdoor.com/city/agua-dulce--ca/
https://nextdoor.com/city/aguanga--ca/
https://nextdoor.com/city/ahwahnee--ca/
https://nextdoor.com/city/alameda--ca/
https://nextdoor.com/city/alamo--ca/
https://nextdoor.com/city/albany--ca/
https://nextdoor.com/city/albion--ca/
https://nextdoor.com/city/alderpoint--ca/
No Neighbor
https://nextdoor.com/city/alhambra--ca/
https://nextdoor.com/city/aliso-viejo--ca/
https://nextdoor.com/city/alleghany--ca/
No Neighbor
https://nextdoor.com/city/alpaugh--ca/
No Neighbor
https://nextdoor.com/city/alpine--ca/
https://nextdoor.com/city/alpine-meadows--ca/
https://nextdoor.com/city/alta--ca/
https://nextdoor.com/city/altadena--ca/
https://nextdoor.com/city/altaville--ca/
No Neighbor
https://nextdo

https://nextdoor.com/city/cerritos--ca/
https://nextdoor.com/city/challenge--ca/
https://nextdoor.com/city/chatsworth--ca/
https://nextdoor.com/city/cherry-valley--ca/
https://nextdoor.com/city/chico--ca/
https://nextdoor.com/city/chinese-camp--ca/
No Neighbor
https://nextdoor.com/city/chino--ca/
https://nextdoor.com/city/chino-hills--ca/
https://nextdoor.com/city/chowchilla--ca/
https://nextdoor.com/city/chualar--ca/
https://nextdoor.com/city/chula-vista--ca/
https://nextdoor.com/city/citrus-heights--ca/
https://nextdoor.com/city/city-of-industry--ca/
https://nextdoor.com/city/claremont--ca/
https://nextdoor.com/city/clarksburg--ca/
https://nextdoor.com/city/clayton--ca/
https://nextdoor.com/city/clearlake--ca/
https://nextdoor.com/city/clearlake-oaks--ca/
https://nextdoor.com/city/cloverdale--ca/
https://nextdoor.com/city/clovis--ca/
https://nextdoor.com/city/coachella--ca/
https://nextdoor.com/city/coalinga--ca/
https://nextdoor.com/city/coarsegold--ca/
https://nextdoor.com/city/col

https://nextdoor.com/city/geyserville--ca/
https://nextdoor.com/city/gilroy--ca/
https://nextdoor.com/city/glencoe--ca/
https://nextdoor.com/city/glendale--ca/
https://nextdoor.com/city/glendora--ca/
https://nextdoor.com/city/glen-ellen--ca/
https://nextdoor.com/city/glenhaven--ca/
https://nextdoor.com/city/glenn--ca/
No Neighbor
https://nextdoor.com/city/glennville--ca/
https://nextdoor.com/city/gold-river--ca/
https://nextdoor.com/city/gold-run--ca/
https://nextdoor.com/city/goleta--ca/
https://nextdoor.com/city/gonzales--ca/
https://nextdoor.com/city/goodyears-bar--ca/
No Neighbor
https://nextdoor.com/city/goshen--ca/
No Neighbor
https://nextdoor.com/city/granada-hills--ca/
https://nextdoor.com/city/grand-terrace--ca/
https://nextdoor.com/city/granite-bay--ca/
https://nextdoor.com/city/grass-valley--ca/
https://nextdoor.com/city/graton--ca/
https://nextdoor.com/city/greenbrae--ca/
https://nextdoor.com/city/greenfield--ca/
https://nextdoor.com/city/greenwood--ca/
https://nextdoor.com

https://nextdoor.com/city/lockwood--ca/
No Neighbor
https://nextdoor.com/city/lodi--ca/
https://nextdoor.com/city/loleta--ca/
https://nextdoor.com/city/loma-linda--ca/
https://nextdoor.com/city/loma-mar--ca/
https://nextdoor.com/city/lomita--ca/
https://nextdoor.com/city/lompoc--ca/
https://nextdoor.com/city/lone-pine--ca/
https://nextdoor.com/city/long-barn--ca/
https://nextdoor.com/city/long-beach--ca/
https://nextdoor.com/city/lookout--ca/
No Neighbor
https://nextdoor.com/city/loomis--ca/
https://nextdoor.com/city/los-alamitos--ca/
https://nextdoor.com/city/los-altos--ca/
https://nextdoor.com/city/los-altos-hills--ca/
https://nextdoor.com/city/los-angeles--ca/
https://nextdoor.com/city/los-banos--ca/
https://nextdoor.com/city/los-gatos--ca/
https://nextdoor.com/city/los-molinos--ca/
https://nextdoor.com/city/los-olivos--ca/
No Neighbor
https://nextdoor.com/city/los-osos--ca/
https://nextdoor.com/city/lost-hills--ca/
No Neighbor
https://nextdoor.com/city/lotus--ca/
No Neighbor
https:

https://nextdoor.com/city/palm-springs--ca/
https://nextdoor.com/city/palo-alto--ca/
https://nextdoor.com/city/palo-cedro--ca/
https://nextdoor.com/city/palomar-park--ca/
No Neighbor
https://nextdoor.com/city/palos-verdes-estates--ca/
https://nextdoor.com/city/palos-verdes-peninsula--ca/
No Neighbor
https://nextdoor.com/city/palo-verde--ca/
No Neighbor
https://nextdoor.com/city/panorama-city--ca/
https://nextdoor.com/city/paradise--ca/
https://nextdoor.com/city/paramount--ca/
https://nextdoor.com/city/parker-dam--ca/
No Neighbor
https://nextdoor.com/city/parlier--ca/
https://nextdoor.com/city/pasadena--ca/
https://nextdoor.com/city/paso-robles--ca/
https://nextdoor.com/city/patterson--ca/
https://nextdoor.com/city/pauma-valley--ca/
https://nextdoor.com/city/paynes-creek--ca/
No Neighbor
https://nextdoor.com/city/pearblossom--ca/
https://nextdoor.com/city/pebble-beach--ca/
https://nextdoor.com/city/penngrove--ca/
https://nextdoor.com/city/penn-valley--ca/
https://nextdoor.com/city/penry

https://nextdoor.com/city/scotia--ca/
No Neighbor
https://nextdoor.com/city/scott-bar--ca/
No Neighbor
https://nextdoor.com/city/scotts-valley--ca/
https://nextdoor.com/city/seal-beach--ca/
https://nextdoor.com/city/seaside--ca/
https://nextdoor.com/city/sebastopol--ca/
https://nextdoor.com/city/seeley--ca/
No Neighbor
https://nextdoor.com/city/seiad-valley--ca/
No Neighbor
https://nextdoor.com/city/selma--ca/
https://nextdoor.com/city/sequoia-national-park--ca/
No Neighbor
https://nextdoor.com/city/shafter--ca/
https://nextdoor.com/city/shandon--ca/
https://nextdoor.com/city/shasta--ca/
No Neighbor
https://nextdoor.com/city/shasta-lake--ca/
https://nextdoor.com/city/shaver-lake--ca/
https://nextdoor.com/city/sheep-ranch--ca/
https://nextdoor.com/city/shell-beach--ca/
No Neighbor
https://nextdoor.com/city/sheridan--ca/
https://nextdoor.com/city/sherman-oaks--ca/
https://nextdoor.com/city/shingle-springs--ca/
https://nextdoor.com/city/shingletown--ca/
https://nextdoor.com/city/shoshone-

https://nextdoor.com/city/winnetka--ca/
https://nextdoor.com/city/winterhaven--ca/
No Neighbor
https://nextdoor.com/city/winters--ca/
https://nextdoor.com/city/winton--ca/
https://nextdoor.com/city/wishon--ca/
https://nextdoor.com/city/witter-springs--ca/
https://nextdoor.com/city/wofford-heights--ca/
https://nextdoor.com/city/woodacre--ca/
https://nextdoor.com/city/woodbridge--ca/
https://nextdoor.com/city/woodlake--ca/
https://nextdoor.com/city/woodland--ca/
https://nextdoor.com/city/woodland-hills--ca/
https://nextdoor.com/city/woodside--ca/
https://nextdoor.com/city/woodville--ca/
No Neighbor
https://nextdoor.com/city/woody--ca/
No Neighbor
https://nextdoor.com/city/wrightwood--ca/
https://nextdoor.com/city/yorba-linda--ca/
https://nextdoor.com/city/yorkville--ca/
https://nextdoor.com/city/yosemite-national-park--ca/
No Neighbor
https://nextdoor.com/city/yountville--ca/
https://nextdoor.com/city/yreka--ca/
https://nextdoor.com/city/yuba-city--ca/
https://nextdoor.com/city/yucaipa--

In [5]:
test = '38.160254,-121.31110100000001,38.195248,-121.21034700000001'

In [7]:
test

In [15]:
mean([float(out[0]),float(out[2])])

38.177751

In [16]:
mean([float(out[1]),float(out[3])])

-121.26072400000001

In [25]:
neighborhood_page = requests.get('https://nextdoor.com/neighborhood/acampoacampo--acampo--ca/')
neighborhood_soup = BeautifulSoup(neighborhood_page.content, 'html.parser')

In [35]:
pattern = "(?<=mapBounds: ').*(?=',)"

In [45]:
map_bound = (re.search(pattern,str(neighborhood_soup)).group(0)).split(',')

In [48]:
lat = mean([float(map_bound[0]),float(map_bound[2])])

In [49]:
lon = mean([float(map_bound[1]),float(map_bound[3])])

In [85]:
temp = DataFrame(['38.177751,-121.26072400000001','34.109331,-116.41116349999999',
                  '38.177751,-121.26072400000001','34.109331,-116.41116349999999'])

In [56]:
geolocator = Nominatim(user_agent="170Project")
location = geolocator.reverse(str(lat)+','+str(lon))

In [86]:
t0 = time.time()
temp[0] = temp[0].apply(geolocator.reverse)
t1 = time.time()
print(t1-t0)

6.072854042053223


In [88]:
print(type(location))
for i in location.raw:
    print(i)

<class 'geopy.location.Location'>
place_id
licence
osm_type
osm_id
lat
lon
display_name
address
boundingbox
