### Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import time
import re
import folium
import pandas as pd

### Constants

In [2]:
BOSTON_URL = r'https://311.boston.gov'

### Main Page Scraper Class

In [3]:
class Scraper:
    """Scrapes the Boston's 311 main page with BeautifulSoup"""
    
    def __init__(self):
        self.current_page = 1
        self.report_urls = []
        
        # Get main page soup
        res = requests.get(BOSTON_URL)
        self.soup = BeautifulSoup(res.text, "html5lib")
        
    def get_next_page_soup(self):
        """Adds soup from next page to soup"""
        if not self.current_page == 20:
            time.sleep(1)
            self.current_page += 1
            
            res = requests.get(BOSTON_URL + "/?page=" + str(self.current_page))
            new_soup = BeautifulSoup(res.text, "html5lib")
            
            self.soup.append(new_soup.body)
            
    def get_report_urls(self):
        """Gets all '.report' class 'onclick' attributes, and gets the appropriate URLs"""
        onclick_attrs = [elem['onclick'] for elem in self.soup.select(".report")]
        self.report_urls = [BOSTON_URL + onclick_text.split("'")[1] for onclick_text in onclick_attrs]

In [4]:
scraper = Scraper()
scraper.get_next_page_soup()
scraper.get_report_urls()
scraper.report_urls

['https://311.boston.gov/reports/101002277511',
 'https://311.boston.gov/reports/101002277510',
 'https://311.boston.gov/reports/101002277509',
 'https://311.boston.gov/reports/101002277508',
 'https://311.boston.gov/reports/101002277506',
 'https://311.boston.gov/reports/101002277498',
 'https://311.boston.gov/reports/101002277505',
 'https://311.boston.gov/reports/101002277502',
 'https://311.boston.gov/reports/101002277297',
 'https://311.boston.gov/reports/101002277501',
 'https://311.boston.gov/reports/101002277500',
 'https://311.boston.gov/reports/101002277499',
 'https://311.boston.gov/reports/101002277304',
 'https://311.boston.gov/reports/101002277497',
 'https://311.boston.gov/reports/101002277495',
 'https://311.boston.gov/reports/101002277494',
 'https://311.boston.gov/reports/101002277493',
 'https://311.boston.gov/reports/101002277490',
 'https://311.boston.gov/reports/101002277491',
 'https://311.boston.gov/reports/101002277492']

### Report Class


In [5]:
class Report:
    """Gets individual report data"""
    
    def __init__(self, report_url):
        self.report_url = report_url
        
        res = requests.get(self.report_url)
        self.soup = BeautifulSoup(res.text, "html5lib")
        
        self.report_dict = {}
        
    def get_id(self):
        """Get the id of the report"""
        self.report_dict['id'] = self.report_url.split("/")[-1]
        
    def get_category(self):
        """Gets the category of the report"""
        category_text = self.soup.select(".content-head")[0].h2.get_text()
        self.report_dict['category'] = category_text.split(" at ")[0]
        
    def get_lat_long(self):
        """Get latitude and longitude of the report"""
        lat_long_elem_text = self.soup.find("strong", text="coordinates lat,lng: ").find_parent("p").get_text()
        lat_long = re.findall("[+-]?\d+\.\d+", lat_long_elem_text)
        
        self.report_dict['latitude'] = float(lat_long[0])
        self.report_dict['longitude'] = float(lat_long[1])
        
    def get_address(self):
        """Gets the address of the report"""
        address_text = self.soup.find("strong", text="address: ").find_parent("p").get_text()
        self.report_dict['address'] = address_text.split("address: ")[1]

In [6]:
report_dicts = []
for i in range(len(scraper.report_urls)):
    time.sleep(1)
    
    report = Report(scraper.report_urls[i])
    report.get_id()
    report.get_category()
    report.get_lat_long()
    report.get_address()
    
    report_dicts.append(report.report_dict)


In [7]:
df = pd.DataFrame(report_dicts, columns=['id', 'category', 'address', 'latitude', 'longitude'])
df

Unnamed: 0,id,category,address,latitude,longitude
0,101002277511,Other,"106 Saratoga St, East Boston",42.376858,-71.036042
1,101002277510,Illegal Parking,Intersection Of Worthington St & Huntington Av...,42.335173,-71.101723
2,101002277509,Litter,"48 62 Brookline Ave, Boston",42.347394,-71.098166
3,101002277508,Residential Trash out Illegally,"16 Myopia Rd, Hyde Park",42.263618,-71.127081
4,101002277506,Litter,"998 Blue Hill Ave, Dorchester",42.289967,-71.089148
5,101002277498,Other,"Intersection Of N Bennet St & Salem St, Boston",42.365815,-71.055016
6,101002277505,Illegal Parking,"15 Montrose St, Roxbury",42.323603,-71.081062
7,101002277502,Schedule a Bulk Item Pickup,"Intersection Of N Bennet St & Salem St, Boston",42.365875,-71.055062
8,101002277297,Other,"36 Worcester Sq, Roxbury",42.33642,-71.073918
9,101002277501,Illegal Graffiti,Intersection Of Hawthorne Pl & William Cardina...,42.363444,-71.066525


### Get folium map

In [8]:
locations = df[['latitude', 'longitude']]
categories = df['category'].tolist()
location_list = locations.values.tolist()

loc_map = folium.Map(location=location_list[0], zoom_start=12)

for i in range(len(location_list)):
    folium.Marker(location_list[i], popup=categories[i]).add_to(loc_map)
    
loc_map


### Folium map for Trash category

In [9]:
search_category = input("Enter a category to search for: ")

Enter a category to search for: Illegal Parking


In [10]:
search_df = df.loc[df['category'] == search_category]
search_df

Unnamed: 0,id,category,address,latitude,longitude
1,101002277510,Illegal Parking,Intersection Of Worthington St & Huntington Av...,42.335173,-71.101723
6,101002277505,Illegal Parking,"15 Montrose St, Roxbury",42.323603,-71.081062
10,101002277500,Illegal Parking,"6 Beethoven St, Roxbury",42.315362,-71.099243
11,101002277499,Illegal Parking,"222 Foster St, Apt 1, Brighton",42.342879,-71.157976
13,101002277497,Illegal Parking,"1 Centre Ave, Dorchester",42.292879,-71.062721
14,101002277495,Illegal Parking,"131 Arlington St, Boston",42.349046,-71.069664


In [11]:
locations = search_df[['latitude', 'longitude']]
categories = search_df['category'].tolist()
location_list = locations.values.tolist()

loc_map = folium.Map(location=location_list[0], zoom_start=12)

for i in range(len(location_list)):
    folium.Marker(location_list[i], popup=categories[i]).add_to(loc_map)
    
loc_map