# Policy Scraping:

## Goals:

1. Generate a pipeline to ease the burden of manually updating values for Zoning Atribute tables on Socrata from online documents.

2. Provide a resource which can easily generate insights into the content and structure of city zoning policy documents.

## Steps:

1. Retrieve previously scraped policy documents from company s3 bucket.

2. Extract information by attribute using regular expression patterns.

3. Organize found attributes into a single pandas DataFrame.

4. Use document context of found attributes to identify potential zones to which they are applicable.

In [5]:
import os
import sys
import re
import datetime
from glob import glob

import pandas as pd
import numpy as np
import nltk
from typing import Tuple
# local import
sys.path.insert(0, '/Users/okeefe/Box/USF Data Science Practicum/2020-21/Shared Files/Utility Code')

from utils_io import *#makedirs_if_not_exists, pull_df_from_redshift_sql
from policy_scraping_support_functions import *

# Step 1:

- Using the below regular expressions and appropriate ranges, scrape the information from a single raw policy doc and organize into dictionary data structure. 

In [6]:
whitelist = ['zoning', 'buildings', 'housing', 'building', 'development', 'land']

attr_regex_dict = {'max_far': [r'(?i)floor.+area.+ratio',                                         
                               r'(?i)max.+far'],
                   'max_dua': [r'(?i)dua', 
                               r'(?i)dwelling.+unit.+acre', 
                               r'(?i)lots.+acres', 
                               r'(?i)acres.+dwelling.+unit', 
                               r'(?i)dwelling.+(acre|unit)', 
                               r'(?i)unit.+acres?'],
                   'building_height' : [r'(?i)building.+height', 
                                        r'(?i)maximum.+height', 
                                        r'(?i)height.+structures?'],
                   'units_per_lot' : [r'(?i)units?.+per.+lot', 
                                      r'(?i)units?.+sq(are)*.*fe*t'],
                   'minimum_lot_sqft': [r'(?i)unit.+per.+fe*t', 
                                        r'(?i)mini?m?u?m?.+lot.+sqa?r?e?.+fe*t', 
                                        r'(?i)sq(uare)*.+fo*e*t', 
                                        r'(?i)gross.+lot.+area']
                  }

attr_range_dict = {'max_far':(0, 5),
                   'max_dua': (0, 100),
                   'building_height' : (15, 250),
                   'units_per_lot' : (0.5, 5),
                   'minimum_lot_sqft': (600, 1e8) 
                  }

In [72]:
def most_Recent_Policy(city_policies: set) -> set:
    """
    Helper function for get_policies() which only pulls most recent policies  
    """
    
    found = {}

    for policy in city_policies:
            pieces = policy.split("/")
            date = pieces[2]
            title = pieces[3]
            if title in found.keys():
                new_time = datetime.strptime(date, "%m-%d-%y")
                old_time = datetime.strptime(found[title], "%m-%d-%y")
                if new_time > old_time:
                    found[title] = date
            else:
                found[title] = date

    
    kjafshar = list(city_policies)[0].split("/")[0]
    city = list(city_policies)[0].split("/")[1]

    new_policies = []
    for title, date in found.items():
        new_policies.append("/".join([kjafshar, city, date, title]))

    return set(new_policies)



def get_policies(whitelist=None, city=None, local=True, most_recent=False) -> set:
    """
    Returns a set of all relevant subpolicy paths, using whitelist as a filtering tool for relevant policy subsections
    
    Example use cases:
        - default: all policies in the local subdirectory PATH:                       get_policies()
        - all policies in s3 subdirectories:                                          get_policies(local=False)
        - all relevant policy subsections for "berkeley" in local subdirectory PATH:  get_policies(whitelist=whitelist, city="berkeley")
        - all relevant policy subsections for "berkeley" in s3:                       get_policies(whitelist=whitelist, city="berkeley", local=False)
        - most recent scraped policies for "berkeley" in s3:                          get_policies(whitelist=whitelist, city="berkeley", local=False, most_recent=True)
    """
    
    # Gather all local policies either from local or s3 bucket
    if local:
        PATH = 'City_Policies'
        keys = glob.glob(PATH + '/**/*.txt', recursive=True)
        
    else:
        list_key_args = {'bucket': 'mtc-redshift-upload', 'Prefix':f'test_kjafshar/'}
        keys = list_s3_keys(**list_key_args)
        
    # Filter keys via whitelist
    if whitelist != None:
        keys = {match for match in keys if any(term.lower() in normalize_text(match) for term in whitelist)}
    
    # Filter for specific city if given
    if city != None:
        keys = {match for match in keys if city == match.split("/")[1]}
        
    # Most recent documents if most_recent is switched to True
    if most_recent:
        keys = most_Recent_Policy(keys)
    
    return keys

In [18]:
def organize_by_attribute(attr_regex_dict: dict, rel_policy_path: set, local=True) -> dict:
    """
    Opens the desired policies via s3 bucket path and filters by attribute.
    Optional "local" flag pulls the policy data from local directory (debugging)
    Returns:
    
    Key: attribute
    Value: tuple (policy_section: str, values: list, context: str, line number: str, fraction into policy doc: str)
    """
    relevant_info={}

    
    for attr in attr_regex_dict.keys():
        #print(attr)
        relevant_info[attr] = list()
        attr_range = attr_range_dict[attr]

        for path in rel_policy_path:
            policy_section = path.split('/')[-1].partition('.')[0]
            if local: # If local parameter is true, open from directory
                #path = 'City_Policies/' + city.lower()+ "/"+ "/".join(path.split("/")[2:])
                data = read_text(path).split('\n')
                #print(data)
             
            else:     # Otherwise, pull from s3 bucket
                data = open_file_by_key(bucket, path)
                #print(data)
            
            regex_list = attr_regex_dict[attr]
            relevant_lines = list()
            
            for exp in regex_list:
                for num, line in enumerate(data):
                    if bool(re.search(exp, line)):
                        items = find_the_values(line, attr_range[0], attr_range[1])
                        fraction = num / len(data) * 100
                        if len(items) > 0:
                            entry = items[0]
                            vals = entry[0]
                            context = entry[1]

                            relevant_lines.append((policy_section, list(vals), context, num+1, fraction))
            
            relevant_info[attr].append(relevant_lines)

    return relevant_info

In [49]:
whitelist = ['zoning', 'buildings', 'housing', 'building', 'development', 'land']

# S3
test_policies = list(get_policies(whitelist=whitelist, city='los_altos', local=False, most_recent=True))
los_altos_attributes_s3 = organize_by_attribute(attr_regex_dict, test_policies, local=False)

test_policies = list(get_policies(whitelist=whitelist, city='los_altos', local=True, most_recent=True))
los_altos_attributes_local = organize_by_attribute(attr_regex_dict, test_policies, local=True)

assert los_altos_attributes_s3 == los_altos_attributes_local

print("Consistent for both local and s3")

Consistent for both local and s3


# Step 2: 

- Use above function to organize dictionaries for each policy of a given city into a pandas dataframe

In [44]:
def build_city_dataframe(city: str, attribute: str, city_policies: set, local=True) -> pd.DataFrame:
    """
    Takes the attribute dictionary created in "organize by attribute" and convert to a readable pandas dataframe.
    """
    
    attr_contexts = organize_by_attribute(attr_regex_dict, city_policies, local)

    rows = []
    for max_far_contexts in attr_contexts[attribute]:
        for found in max_far_contexts:
            policy = found[0]
            found_vals = found[1]
            found_vals = ", ".join(str(val) for val in found[1])
            context = found[2]
            lineno = found[3]
            fraction = found[4]
            rows.append([city, attribute, found_vals, context, policy, lineno, fraction])

    df_city = pd.DataFrame(rows, columns=["City", "Attribute","Values", "Context","Policy Subsection", "Line No.", "Fraction"])
    return df_city

# Step 3:

- Concatenate dataframes representing each city's zoning attributes into a single table.

In [57]:
whitelist = ['zoning', 'buildings', 'housing', 'building', 'development', 'land']
anomalies = ['walnut', 'oakley', 'san_mateo_county', 'hercules', 'fairfield']  # Cities that weren't scraped by section
whitelist.extend(anomalies)
get_policies(whitelist=whitelist, city='walnut creek', local=True, most_recent=True)

{'City_Policies/walnut creek/02-18-20/TITLE 1. GENERAL PROVISIONS.txt'}

In [74]:
all_cities = get_cities()
df_all_cities = pd.DataFrame(columns = ["City", "Attribute","Values", "Context","Policy Subsection", "Line No.", "Fraction"])

i=1
success = len(all_cities) * len(attr_regex_dict.keys())

for city in list(all_cities):
    print(f"{city} ({i}/{len(all_cities)})")
    whitelist = ['zoning', 'buildings', 'housing', 'building', 'development', 'land']
    anomalies = ['walnut', 'oakley', 'san_mateo_county', 'hercules', 'fairfield']  # Cities that didn't get picked up by whitelist (walnut = walnut creek)
    whitelist.extend(anomalies)
    try:
        city_policies = get_policies(whitelist=whitelist, city=city, local=True, most_recent=True)
    except Exception as e:
        print(f"{city} should be added to anomalies")
        print(e)

    i += 1
    for attr in attr_regex_dict.keys():
        try:
            city_df = build_city_dataframe(city, attr, city_policies)
            df_all_cities = df_all_cities.append(city_df)
        except Exception as e:
            print("Ooops! There was another sort of error")
            print(e)
            success -= 1

print(f"Extraction Success Rate = {success / (len(all_cities) * len(attr_regex_dict.keys()))}")

contra_costa_county (1/84)
san carlos (2/84)
sonoma_county (3/84)
richmond (4/84)
portola_valley (5/84)
monte_sereno (6/84)
hillsborough (7/84)
lafayette (8/84)
sunnyvale (9/84)
benicia (10/84)
rio vista (11/84)
american canyon (12/84)
belmont (13/84)
brisbane (14/84)
windsor (15/84)
cotati (16/84)
pittsburg (17/84)
cloverdale (18/84)
union city (19/84)
redwood_city (20/84)
menlo park (21/84)
pleasant hill (22/84)
los altos hills (23/84)
mill valley (24/84)
half moon bay (25/84)
sonoma (26/84)
oakley (27/84)
marin_county (28/84)
yountville (29/84)
san_anselmo (30/84)
dublin (31/84)
east_palo_alto (32/84)
south san francisco (33/84)
livermore (34/84)
san_mateo_county (35/84)
clayton (36/84)
hercules (37/84)
walnut creek (38/84)
saratoga (39/84)
brentwood (40/84)
san_jose (41/84)
millbrae (42/84)
hayward (43/84)
foster city (44/84)
alameda (45/84)
emeryville (46/84)
el_cerrito (47/84)
san_rafael (48/84)
morgan_hill (49/84)
los_gatos (50/84)
napa (51/84)
napa_county (52/84)
vacaville (53/

### Classes/functions to help format dataframe

In [109]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

from typing import Sequence

def find_Information(df: pd.DataFrame, city: str, attribute: str) -> None:
    """Prints the specific city/zoning attribute values and contexts pulled from scraped data"""
    df = df[df['City'] == city]
    df = df[df['Attribute'] == attribute]
    print(f"Number of contexts found for {attribute} in {city}: {len(df)}")
    with pd.option_context('display.max_colwidth', None, 'display.max_rows', None,):
      display(df)


def filelist(root) -> Sequence[str]:
    """Return a fully-qualified list of filenames under root directory; sort names alphabetically."""
    allfiles = []
    for path, subdirs, files in os.walk(root):
        for name in files:
            allfiles.append(os.path.join(path, name))
    return sorted(allfiles)


def print_Context(df: pd.DataFrame, line_num: int, context_window: int) -> None:
    """
    Uses dataFrame above to show the context of found lines in the dataFrame
    
    TODO: Edit such that the filelist calls "get_policies instead --> consistent with previous functions"
    """
    
    policy = df.loc[line_num]['Policy Subsection']
    city = df.loc[line_num]['City']

    root = '/Users/okeefe/Box/USF Data Science Practicum/2020-21/Okeefe/Project_1_Policy_Parsing/City_Policies'
    
    key_words = [policy, city]
    # print(key_words)
    all_files = filelist(root)
    # print(all_files)
    test = [path for path in all_files if all(word in path for word in key_words)]
    

    relevant_paths = [path for path in all_files if all(word in path for word in key_words)]
    # print(relevant_paths)
    hit = df.loc[line_num]['Line No.'] - 1

    print("City:", city)
    print("Attribute:",df.loc[line_num]['Attribute'])
    print("Policy Section:", policy)
    print("Line Number: ", hit)
    print("Line: ", df.loc[line_num]['Context'])
    print()
    print("Context:")
    print()
    path = relevant_paths[0]
    with open(path, 'r') as f:
        lines = f.readlines()#.split('\n')

    for i, line in enumerate(lines):
        if hit-context_window < i < hit+context_window:
            if i == hit: 
                print(color.BOLD+f"{i}:     {line}" +color.END)
            else:
                print(f"{i}:     {line}")

In [86]:
df_all_cities

Unnamed: 0,City,Attribute,Values,Context,Policy Subsection,Line No.
0,contra_costa_county,max_far,3.0,Floor Area. The maximum floor area allowed for...,Title 8 - ZONING,5089
1,contra_costa_county,max_far,3.0,Floor Area. The maximum floor area allowed for...,Title 8 - ZONING,5246
2,contra_costa_county,max_far,"1.0, 2.0, 3.0, 4.0","In reaching a decision, the zoning administrat...",Title 8 - ZONING,11800
3,contra_costa_county,max_far,"1.0, 2.0, 3.0, 4.0","In reaching a decision, the zoning administrat...",Title 8 - ZONING,11942
0,contra_costa_county,max_dua,1.0,"""Long-term bicycle parking"" means a covered, a...",Title 8 - ZONING,997
...,...,...,...,...,...,...
25,newark,minimum_lot_sqft,1000.0,Nurseries and Garden Center 1 per 500 square f...,TITLE 17 - ZONING,4314
26,newark,minimum_lot_sqft,1000.0,Building Materials Sales and Services 1 per 50...,TITLE 17 - ZONING,4319
27,newark,minimum_lot_sqft,"1000.0, 5000.0","Industrial Uses 1 per 1,000 square feet of flo...",TITLE 17 - ZONING,4320
28,newark,minimum_lot_sqft,150000.0,"150,001+ 2 plus 1 per each additional 150,000 ...",TITLE 17 - ZONING,4512


In [88]:
df_save = df_all_cities.reset_index().drop('index', axis=1)
df_save

Unnamed: 0,City,Attribute,Values,Context,Policy Subsection,Line No.
0,contra_costa_county,max_far,3.0,Floor Area. The maximum floor area allowed for...,Title 8 - ZONING,5089
1,contra_costa_county,max_far,3.0,Floor Area. The maximum floor area allowed for...,Title 8 - ZONING,5246
2,contra_costa_county,max_far,"1.0, 2.0, 3.0, 4.0","In reaching a decision, the zoning administrat...",Title 8 - ZONING,11800
3,contra_costa_county,max_far,"1.0, 2.0, 3.0, 4.0","In reaching a decision, the zoning administrat...",Title 8 - ZONING,11942
4,contra_costa_county,max_dua,1.0,"""Long-term bicycle parking"" means a covered, a...",Title 8 - ZONING,997
...,...,...,...,...,...,...
20541,newark,minimum_lot_sqft,1000.0,Nurseries and Garden Center 1 per 500 square f...,TITLE 17 - ZONING,4314
20542,newark,minimum_lot_sqft,1000.0,Building Materials Sales and Services 1 per 50...,TITLE 17 - ZONING,4319
20543,newark,minimum_lot_sqft,"1000.0, 5000.0","Industrial Uses 1 per 1,000 square feet of flo...",TITLE 17 - ZONING,4320
20544,newark,minimum_lot_sqft,150000.0,"150,001+ 2 plus 1 per each additional 150,000 ...",TITLE 17 - ZONING,4512


In [89]:
df_save.to_csv('City_Zoning_Attributes_Final.csv')

# Use Case

- Finding the minimum lot size  in square feet for Los Altos from its city policy

In [111]:
df_all_cities = pd.read_csv('City_Zoning_Attributes_Final.csv', index_col='Unnamed: 0')

In [112]:
find_Information(df=df_all_cities, city='los_altos', attribute='minimum_lot_sqft')

Number of contexts found for minimum_lot_sqft in los_altos: 103


Unnamed: 0,City,Attribute,Values,Context,Policy Subsection,Line No.
17656,los_altos,minimum_lot_sqft,"2019.0, 750.0","R313.1 Townhouse automatic fire sprinkler systems. An automatic residential fire sprinkler system shall be installed in accordance with National Fire Protection Association's (NFPA) Standard 13D in all new townhouses and in existing townhouses, when additions are made that increase the building area to more than the allowable Fire-Flow Appendix B, Tables B105.1(1) and B105.1(2) of the 2019 California Fire Code, and/or additions exceeding fifty (50) percent of the existing living area (existing square foot calculations shall not include existing basement) and/or additions exceeding seven hundred and fifty (750) square feet. When automatic fire sprinkler systems are required by this section, all associated attached garages shall be included. Additions over fifty (50) percent and/or seven hundred and fifty (750) square feet as referenced above, shall be treated as a new structure regarding installation of fire sprinkler systems. For the purpose of this section, removal of roof framing with associated exterior walls down to, or below the subfloor/slab shall be included in the above calculations. Therefore, the following shall apply:",Title 12 - BUILDINGS AND CONSTRUCTION,86
17657,los_altos,minimum_lot_sqft,"2019.0, 750.0","R313.2 One and two-family dwellings automatic fire sprinklers systems. An automatic residential fire sprinkler system shall be installed in accordance with National Fire Protection Association's (NFPA) Standard 13D in all new one and two-family dwellings and in existing dwellings, when additions are made that increase the building area to more than the allowable Fire-Flow Appendix Tables B105.1(1) and B105.1(2) of the 2019 California Fire Code, and/or additions exceeding fifty (50) percent of the existing living area (existing square foot calculations shall not include existing basement) and/or additions exceeding seven hundred and fifty (750) square feet. When automatic fire sprinkler systems are required by this section, all associated garages shall be included. Additions over fifty (50) percent and/or seven hundred and (750) square feet as referenced above, shall be treated as a new structure regarding installation of fire sprinkler systems. For the purpose of this section, removal of roof framing with associated exterior walls down to, or below the subfloor/slab shall be included in the above calculations. Therefore, the following shall apply:",Title 12 - BUILDINGS AND CONSTRUCTION,89
17658,los_altos,minimum_lot_sqft,2500.0,"315.8.6 Automatic sprinkler system. Buildings containing fire areas used for lithium battery storage or handling shall be equipped throughout with an approved automatic sprinkler system in accordance with Section 903.3.1.1. The design of the sprinkler system within each fire area shall not be less than that required for Extra Hazard Group 2 with a minimum design area of 2,500 square feet. Where the storage arrangement is required by other provisions of this code to be provided with a higher level of sprinkler system protection, the higher level of sprinkler system protection shall be provided.",Title 12 - BUILDINGS AND CONSTRUCTION,401
17659,los_altos,minimum_lot_sqft,30000.0,"The total building area is 30,000 square feet or more;",Title 12 - BUILDINGS AND CONSTRUCTION,479
17660,los_altos,minimum_lot_sqft,5000.0,"The total basement area is 5,000 square feet or more;",Title 12 - BUILDINGS AND CONSTRUCTION,481
17661,los_altos,minimum_lot_sqft,"1000.0, 3000.0",This chapter shall not apply to existing non-habitable residential accessory structures under three (3000) thousand square feet.,Title 12 - BUILDINGS AND CONSTRUCTION,539
17662,los_altos,minimum_lot_sqft,750.0,"An approved automatic sprinkler system shall be provided throughout all existing buildings, when additions are made that exceed fifty (50) percent and/or seven hundred and fifty (750) square feet of existing floor areas (area calculations shall not include existing basement floor areas).",Title 12 - BUILDINGS AND CONSTRUCTION,543
17663,los_altos,minimum_lot_sqft,750.0,"It is the intent of the city to ensure that all new utility services and relocated existing utility services are placed underground, including additions exceeding fifty (50) percent of floor area and/or seven hundred and fifty (750) square feet or more, excluding basements. For the purpose of this section, removal of roof framing with associated exterior walls down to, or below the subfloor/slab shall be included in the above calculations. Therefore, the following shall apply:",Title 12 - BUILDINGS AND CONSTRUCTION,1608
17664,los_altos,minimum_lot_sqft,"5000.0, 3999.0","Apartments, two family units or more per building with not more than one dwelling unit for each five thousand (5,000) square feet of lot area; provided, however, if after dividing the area of the site by five thousand (5,000), a remainder of less than five thousand (5,000) square feet but more than three thousand nine hundred ninety-nine (3,999) square feet is obtained, one additional dwelling unit may be located on the site;",Title 14 - ZONING,1385
17665,los_altos,minimum_lot_sqft,"20000.0, 1000.0, 43560.0, 10000.0, 15000.0","Density. The maximum number of permitted dwelling units shall be calculated by subtracting twenty (20) percent of the gross area of the parcel and dividing the remainder by the required lot area per dwelling unit in the appropriate R1 District (ten thousand (10,000) square feet in the R1-10 District, fifteen thousand (15,000) square feet in the R1-H District, twenty thousand (20,000) square feet in the R1-20 District, and forty-three thousand five hundred sixty (43,560) square feet in the R1-40 District).",Title 14 - ZONING,4781


In [114]:
print_Context(df=df_all_cities, line_num=17659, context_window=20)

City: los_altos
Attribute: minimum_lot_sqft
Policy Section: Title 12 - BUILDINGS AND CONSTRUCTION
Line Number:  478
Line:  The total building area is 30,000 square feet or more;

Context:

459:     When there are not more than two Group R-3 or accessory Group U occupancies, the dimension may be increased to a maximum of 200 feet.

460:     3.

461:     When apparatus roads cannot be installed because of topography, waterways, nonnegotiable grades or other similar conditions, an approved alternative means of fire protection shall be provided.

462:     Section 503.2.1 is amended to read as follows:

463:     Section 503.2.1 Dimensions. Fire apparatus access roads shall have an unobstructed width of not less than 20 feet (6096 mm), exclusive of shoulders or as required by fire department access road standards, except for approved security gates in accordance with Section 503.6, and an unobstructed vertical clearance of 13 feet 6 inches (4115 mm).

464:     Exception:

465:     When there

# Zone Candidates

- Search within the "context window" for any values that potentially be zones. Append any potential values as a column

In [116]:
def print_df(df: pd.DataFrame) -> None:
    """Creates a pretty-print of the dataframe (columns not truncated)"""
    with pd.option_context('display.max_colwidth', None, 'display.max_rows', None,):
        display(df.drop(['Fraction'], axis=1))

def zone_Candidates(df: pd.DataFrame, context_window: int) -> list:
    """
    Searches in the window of lines for any zone candidates to which an attribute may be relevant.
    
    Returns: list which is then appended as a new column to the inputted dataframe
    """
    zone_column = []
    
    for row in df.index:
        line_no = df.loc[row]['Line No.']
        policy = df.loc[row]['Policy Subsection']
        city = df.loc[row]['City']

        root = '/Users/okeefe/Box/USF Data Science Practicum/2020-21/Okeefe/City_Policies'
        
        key_words = [policy, city]
        all_files = filelist(root)
        relevant_paths = [path for path in all_files if all(word in path for word in key_words)]
        
        hit = df.loc[row]['Line No.'] - 1
        
        path = relevant_paths[0]
        extract_zones = []
        
        with open(path, 'r') as f:
            lines = f.readlines()

        for i, line in enumerate(lines):
            if hit-context_window < i < hit+context_window:
                pot_zone = re.findall(r'[A-Z]+\d-*\d*[A-Z]*', line)
                extract_zones.extend(pot_zone)
        
        if len(extract_zones) > 0:
            zones_found = ', '.join(val for val in list(set(extract_zones)))
        else:
            zones_found = np.nan

        zone_column.append(zones_found)
        
    
    
    return zone_column

In [13]:
idx = np.where((df_all_cities['City']=='los_altos') & (df_all_cities['Attribute']=='minimum_lot_sqft'))
df_test = df_all_cities.loc[idx]
print_df(df_test.head(10))

Unnamed: 0,City,Attribute,Values,Context,Policy Subsection,Line No.
13381,los_altos,minimum_lot_sqft,"2019.0, 750.0","R313.1 Townhouse automatic fire sprinkler systems. An automatic residential fire sprinkler system shall be installed in accordance with National Fire Protection Association's (NFPA) Standard 13D in all new townhouses and in existing townhouses, when additions are made that increase the building area to more than the allowable Fire-Flow Appendix B, Tables B105.1(1) and B105.1(2) of the 2019 California Fire Code, and/or additions exceeding fifty (50) percent of the existing living area (existing square foot calculations shall not include existing basement) and/or additions exceeding seven hundred and fifty (750) square feet. When automatic fire sprinkler systems are required by this section, all associated attached garages shall be included. Additions over fifty (50) percent and/or seven hundred and fifty (750) square feet as referenced above, shall be treated as a new structure regarding installation of fire sprinkler systems. For the purpose of this section, removal of roof framing with associated exterior walls down to, or below the subfloor/slab shall be included in the above calculations. Therefore, the following shall apply:",Title 12 - BUILDINGS AND CONSTRUCTION,86
13382,los_altos,minimum_lot_sqft,"2019.0, 750.0","R313.2 One and two-family dwellings automatic fire sprinklers systems. An automatic residential fire sprinkler system shall be installed in accordance with National Fire Protection Association's (NFPA) Standard 13D in all new one and two-family dwellings and in existing dwellings, when additions are made that increase the building area to more than the allowable Fire-Flow Appendix Tables B105.1(1) and B105.1(2) of the 2019 California Fire Code, and/or additions exceeding fifty (50) percent of the existing living area (existing square foot calculations shall not include existing basement) and/or additions exceeding seven hundred and fifty (750) square feet. When automatic fire sprinkler systems are required by this section, all associated garages shall be included. Additions over fifty (50) percent and/or seven hundred and (750) square feet as referenced above, shall be treated as a new structure regarding installation of fire sprinkler systems. For the purpose of this section, removal of roof framing with associated exterior walls down to, or below the subfloor/slab shall be included in the above calculations. Therefore, the following shall apply:",Title 12 - BUILDINGS AND CONSTRUCTION,89
13383,los_altos,minimum_lot_sqft,2500.0,"315.8.6 Automatic sprinkler system. Buildings containing fire areas used for lithium battery storage or handling shall be equipped throughout with an approved automatic sprinkler system in accordance with Section 903.3.1.1. The design of the sprinkler system within each fire area shall not be less than that required for Extra Hazard Group 2 with a minimum design area of 2,500 square feet. Where the storage arrangement is required by other provisions of this code to be provided with a higher level of sprinkler system protection, the higher level of sprinkler system protection shall be provided.",Title 12 - BUILDINGS AND CONSTRUCTION,401
13384,los_altos,minimum_lot_sqft,30000.0,"The total building area is 30,000 square feet or more;",Title 12 - BUILDINGS AND CONSTRUCTION,479
13385,los_altos,minimum_lot_sqft,5000.0,"The total basement area is 5,000 square feet or more;",Title 12 - BUILDINGS AND CONSTRUCTION,481
13386,los_altos,minimum_lot_sqft,"1000.0, 3000.0",This chapter shall not apply to existing non-habitable residential accessory structures under three (3000) thousand square feet.,Title 12 - BUILDINGS AND CONSTRUCTION,539
13387,los_altos,minimum_lot_sqft,750.0,"An approved automatic sprinkler system shall be provided throughout all existing buildings, when additions are made that exceed fifty (50) percent and/or seven hundred and fifty (750) square feet of existing floor areas (area calculations shall not include existing basement floor areas).",Title 12 - BUILDINGS AND CONSTRUCTION,543
13388,los_altos,minimum_lot_sqft,750.0,"It is the intent of the city to ensure that all new utility services and relocated existing utility services are placed underground, including additions exceeding fifty (50) percent of floor area and/or seven hundred and fifty (750) square feet or more, excluding basements. For the purpose of this section, removal of roof framing with associated exterior walls down to, or below the subfloor/slab shall be included in the above calculations. Therefore, the following shall apply:",Title 12 - BUILDINGS AND CONSTRUCTION,1608
13389,los_altos,minimum_lot_sqft,"5000.0, 3999.0","Apartments, two family units or more per building with not more than one dwelling unit for each five thousand (5,000) square feet of lot area; provided, however, if after dividing the area of the site by five thousand (5,000), a remainder of less than five thousand (5,000) square feet but more than three thousand nine hundred ninety-nine (3,999) square feet is obtained, one additional dwelling unit may be located on the site;",Title 14 - ZONING,1385
13390,los_altos,minimum_lot_sqft,"20000.0, 1000.0, 43560.0, 10000.0, 15000.0","Density. The maximum number of permitted dwelling units shall be calculated by subtracting twenty (20) percent of the gross area of the parcel and dividing the remainder by the required lot area per dwelling unit in the appropriate R1 District (ten thousand (10,000) square feet in the R1-10 District, fifteen thousand (15,000) square feet in the R1-H District, twenty thousand (20,000) square feet in the R1-20 District, and forty-three thousand five hundred sixty (43,560) square feet in the R1-40 District).",Title 14 - ZONING,4781


In [40]:
df_test['Zone Candidates'] = zone_row

In [26]:
#df_test.to_csv('Use_Case_Presentation.csv')

# Final Dataset
- With all the work above, we can see that many zones were found, but many zones were not!

In [40]:
df = pd.read_csv('Use_Case_Presentation.csv', index_col='Unnamed: 0')

In [118]:
df_with_zones = pd.read_csv('City_Zoning_Attributes_with_Zones_Final.csv').rename(columns={"Line":"Line No."})
print_df(df_with_zones.sample(45))

Unnamed: 0.1,Unnamed: 0,City,Attribute,Values,Context,Policy Subsection,Line No.,Zone_Candidates
10099,10099,pleasant hill,units_per_lot,"1.0, 4.0","B. Maximum limits. When a regulation is expressed in terms of maximum limits, any fractional result will be rounded down to the next lower whole number. For example, if a maximum limit of one dwelling unit for every 2,500 square feet in the MRL district is applied to a 12,000 square foot site, the resulting fraction of 4.8 is rounded down to four allowed dwelling units. (Ord. 710 § 35-42.2, 1996; 1991 code § 35-42.2)",Title 18 PLANNING AND LAND USE,9313,
15518,15518,burlingame,building_height,"40.0, 24.0, 30.0","The maximum lot coverage for all buildings and structures shall be forty (40) percent, including balconies, stairs, roof overhangs extending more than twenty-four (24) inches, trellises and improvements which exceed thirty (30) inches in height above adjacent existing grade. (Ord. 1863 § 10, (2011))",Title 25 ZONING,890,
7837,7837,alameda_county,max_dua,"1.0, 20.0, 17.0",The maximum dwelling unit density shall be one per twenty (20) acres and the minimum building site area shall be seventeen (17) acres provided the following criteria are met to the satisfaction of the planning director and by the time specified in the tentative map approving the subdivision:,Title 17 - ZONING,1741,
8936,8936,sunnyvale,minimum_lot_sqft,2500.0,"2.3. The structural design of the footing is based on a specified compressive strength, f’c, no greater than 2,500 pounds per square inch (psi) (17.2 MPa), regardless of the compressive strength specified in the construction documents or used in the footing construction.",Title 16,178,
11221,11221,contra_costa_county,minimum_lot_sqft,10000.0,"4. The structure exceeds 10,000 square feet, contains more than one fire area containing exhibition and display rooms, and is separated into two or more buildings by fire walls of less than four hour fire resistance rating without openings.",Title 7 - BUILDING REGULATIONS,3012,
13681,13681,yountville,max_dua,"1.0, 5.0","4. When a density bonus has been granted, the timing for the construction of the affordable housing units which includes the following provision: for every five dwelling units constructed at least one shall meet affordability requirements.",Title 17 ZONING,2896,
16265,16265,woodside,minimum_lot_sqft,7754.0,"5.5 acres 7,754 square feet",TITLE XV - LAND USAGE,4753,
1820,1820,daly_city,max_dua,"1.0, 4.0","b. Except that there shall be no more than four dwelling units for the first 2,500 square feet of lot area; thereafter, one additional dwelling unit shall be permitted for each additional 500 square feet of lot area.",Title 17 - ZONING,716,
18096,18096,calistoga,max_dua,1.0,"9. The conversion or demolition of existing dwelling units, except under one of the following circumstances:",Title 17 ZONING,3114,
4518,4518,brisbane,minimum_lot_sqft,5000.0,"The minimum area of any lot shall be five thousand (5,000) square feet, except as otherwise provided in subsection B of this section.",Title 17 - ZONING,1899,


In [122]:
df_zones_no_nan = df_with_zones[(~df_with_zones['Zone_Candidates'].isnull())]
print_df(df_zones_no_nan.sample(15))

Unnamed: 0.1,Unnamed: 0,City,Attribute,Values,Context,Policy Subsection,Line No.,Zone_Candidates
4060,4060,napa,max_dua,"8.0, 3.0",General Plan density range: 3 to 8 units/acre,Title 17 ZONING,5270,O2003
14823,14823,milpitas,max_dua,"3.0, 6.0","Where the allowable dwelling units exceed six (6) for a single parcel or the total area to be subdivided exceeds three (3) acres, all on-site utilities are to be placed underground.","Title XI - ZONING, PLANNING AND ANNEXATION",3849,"R3, R4"
4099,4099,napa,building_height,35.0,"2. Height. All uses and structures shall be designed to prevent hazard to flight that could occur as a result of very tall structures intruding into flight areas. Height limits shall be as in the underlying zoning district, or, if height limits are not specifically assigned by the underlying district, the height limit shall be 35 feet. Any project proposing heights over the applicable height limit shall require a use permit and be referred to the ALUC prior to final approval.",Title 17 ZONING,4233,O2003
11304,11304,san_ramon,max_dua,1.0,"On developments where progressive individual grading projects or several concurrent projects are being constructed by one owner, a continuing (blanket) bond which will cover all such projects may be accepted and the amount determined by the director.","TITLE C - CONSTRUCTION, DEVELOPMENT AND LAND USE",8332,"C7-67, C7-66, C7-55, C7-52, C5-168, C7-54, C7-53, C7-65, C7-56"
15169,15169,milpitas,minimum_lot_sqft,2500.0,"1. Accessory buildings up to 2,500 square feet in area, provided that the proposed structure is not adjacent to a residential or Mobile Home Park Overlay (-MHP) district or use, and provided that building height, parking, setback, yard coverage, Floor Area Ratio, landscaping, open space and other ordinance requirements are met. The following shall also apply:","Title XI - ZONING, PLANNING AND ANNEXATION",17547,"R4, R3, R1, R2"
4083,4083,napa,max_dua,17.0,“Dwelling unit” shall have the meaning set forth in Title 17 of this code.,Title 15 BUILDINGS AND CONSTRUCTION,1437,O2018
8394,8394,marin_county,minimum_lot_sqft,1450.0,"The maximum residential density shall not exceed one unit per 1,450 square feet of lot area (30 units per acre).",Title 22 - DEVELOPMENT CODE,4278,"H1, C1"
13231,13231,los_altos,max_dua,"1.0, 6.0, 15.0","Master Unit. In addition to the signs allowed to individual dwelling units, the office or master unit of multi-family residential uses in the R3 districts are allowed one sign not to exceed fifteen (15) square feet in area and six feet in height. Such sign is subject to a sign permit.",Title 14 - ZONING,5656,R3
16696,16696,sonoma_county,max_dua,"24.0, 30.0, 7.0","Certified Twenty-Four-Hour Property Manager . All vacation rentals operating within unincorporated Sonoma County must have a certified property manager who is available twenty-four (24) hours per days, seven (7) days per week during all times that the property is rented or used on a transient basis. Certified property managers may be professional property managers, realtors, property owners, or other designated person provided that the individual has successfully completed a training course and achieved a qualifying score on a county-administered certification test. Certification shall be granted by the county and may be revoked by the county. Once certified, a property manager must continue to comply with all provisions set forth in this section, including timely reporting of all complains and their resolutions, in order to remain certified. Certified property managers must be located within a thirty-mile radius of the vacation rental and must be available to respond to complaints at all times during the rental period. Any requested change to the certified property manager for a vacation rental property shall be made through submittal of a new vacation supplemental application or similar form provided by the department, and shall include the signature of the certified property manager and the desired effective date of the change. In no case may a vacation rental operate without a current certified property manager. Operation of a vacation rental without a valid certified property manager shall be considered a violation of this section. The name and twenty-four-hour contact information of the certified property manager shall be provided to any interested party upon request.",CHAPTER 26 - SONOMA COUNTY ZONING REGULATIONS,7938,"L25, L08, L50, L02"
18884,18884,mountain_view,building_height,32.0,"A 32 sq. ft. None One sign, maximum height = 12 ft., unlighted, no closer than 15 feet to right-of-way. N/A N/A",CHAPTER 36,3582,"R4, R1, R3D, R2, R3"
