In [361]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [363]:
url = 'https://laalmanac.com/government/gx11.php'

page = requests.get(url)

soup = BeautifulSoup(page.text, 'html')

In [365]:
table = soup.find('table')

In [367]:
column_data = table.find_all('tr')

In [369]:
list = []
for row in column_data[1:]:
    row_data = row.find_all('td')
    individual_row_data = [data.text.strip() for data in row_data]
    list.append(individual_row_data)

In [371]:
df = pd.DataFrame(list, columns=['City', 'Tax Rate %'])

In [373]:
df

Unnamed: 0,City,Tax Rate %
0,Los Angeles County Unincorporated Areas,9.50%
1,Agoura Hills,9.50%
2,Alhambra,10.25%
3,Arcadia,10.25%
4,Artesia,9.50%
...,...,...
84,Walnut,9.50%
85,West Covina,9.50%
86,West Hollywood,10.25%
87,Westlake Village,9.50%


In [375]:
def clean_and_convert(value):
    value = value.replace(',', '').rstrip('%').strip()  # Remove commas, trailing '+', and whitespace
    return pd.to_numeric(value, errors='coerce')  # Convert to numeric, setting errors to NaN

df.loc[:, df.columns != 'City'] = df.loc[:, df.columns != 'City'].applymap(clean_and_convert)

  df.loc[:, df.columns != 'City'] = df.loc[:, df.columns != 'City'].applymap(clean_and_convert)


In [377]:
df

Unnamed: 0,City,Tax Rate %
0,Los Angeles County Unincorporated Areas,9.5
1,Agoura Hills,9.5
2,Alhambra,10.25
3,Arcadia,10.25
4,Artesia,9.5
...,...,...
84,Walnut,9.5
85,West Covina,9.5
86,West Hollywood,10.25
87,Westlake Village,9.5


In [379]:
cities = ['Los Angeles County',
 'Acton†',
 'Agoura Hills',
 'Agua Dulce†',
 'Alhambra',
 'Alondra Park†',
 'Altadena†',
 'Arcadia',
 'Artesia',
 'Avalon',
 'Avocado Heights†',
 'Azusa',
 'Baldwin Park',
 'Bell',
 'Bell Gardens',
 'Bellflower',
 'Beverly Hills',
 'Bradbury',
 'Burbank',
 'Calabasas',
 'Carson',
 'Castaic†',
 'Cerritos',
 'Charter Oak†',
 'Citrus†',
 'Claremont',
 'Commerce',
 'Compton',
 'Covina',
 'Cudahy',
 'Culver City',
 'Del Aire†',
 'Desert View Highlands†',
 'Diamond Bar',
 'Downey',
 'Duarte',
 'East Los Angeles†',
 'East Pasadena†',
 'East Rancho Dominguez†',
 'East San Gabriel†',
 'East Whittier†',
 'El Monte',
 'El Segundo',
 'Elizabeth Lake†',
 'Florence-Graham†',
 'Gardena',
 'Glendale',
 'Glendora',
 'Green Valley†',
 'Hacienda Heights†',
 'Hasley Canyon†',
 'Hawaiian Gardens',
 'Hawthorne',
 'Hermosa Beach',
 'Hidden Hills',
 'Huntington Park',
 'Industry',
 'Inglewood',
 'Irwindale',
 'La Cañada Flintridge',
 'La Crescenta-Montrose†',
 'La Habra Heights',
 'La Mirada',
 'La Puente',
 'La Verne',
 'Ladera Heights†',
 'Lake Hughes†',
 'Lake Los Angeles†',
 'Lakewood',
 'Lancaster',
 'Lawndale',
 'Lennox†',
 'Leona Valley†',
 'Littlerock†',
 'Lomita',
 'Long Beach',
 'Los Angeles',
 'Lynwood',
 'Malibu',
 'Manhattan Beach',
 'Marina del Rey†',
 'Mayflower Village†',
 'Maywood',
 'Monrovia',
 'Montebello',
 'Monterey Park',
 'North El Monte†',
 'Norwalk',
 'Palmdale',
 'Palos Verdes Estates',
 'Paramount',
 'Pasadena',
 'Pepperdine University†',
 'Pico Rivera',
 'Pomona',
 'Quartz Hill†',
 'Rancho Palos Verdes',
 'Redondo Beach',
 'Rolling Hills',
 'Rolling Hills Estates',
 'Rose Hills†',
 'Rosemead',
 'Rowland Heights†',
 'San Dimas',
 'San Fernando',
 'San Gabriel',
 'San Marino',
 'San Pasqual†',
 'Santa Clarita',
 'Santa Fe Springs',
 'Santa Monica',
 'Sierra Madre',
 'Signal Hill',
 'South El Monte',
 'South Gate',
 'South Monrovia Island†',
 'South Pasadena',
 'South San Gabriel†',
 'South San Jose Hills†',
 'South Whittier†',
 'Stevenson Ranch†',
 'Sun Village†',
 'Temple City',
 'Topanga†',
 'Torrance',
 'Val Verde†',
 'Valinda†',
 'Vernon',
 'View Park-Windsor Hills†',
 'Vincent†',
 'Walnut',
 'Walnut Park†',
 'West Athens†',
 'West Carson†',
 'West Covina',
 'West Hollywood',
 'West Puente Valley†',
 'West Rancho Dominguez†',
 'West Whittier-Los Nietos†',
 'Westlake Village',
 'Westmont†',
 'Whittier',
 'Willowbrook†']




In [381]:
missing_cities = [city for city in cities if city not in df['City'].values]

# Create a DataFrame with missing cities and set 'Value' to 9.5
new_entries = pd.DataFrame({'City': missing_cities, 'Tax Rate %': 9.5})

# Append the new entries to the original DataFrame
df = pd.concat([df, new_entries], ignore_index=True)

In [383]:
df

Unnamed: 0,City,Tax Rate %
0,Los Angeles County Unincorporated Areas,9.5
1,Agoura Hills,9.5
2,Alhambra,10.25
3,Arcadia,10.25
4,Artesia,9.5
...,...,...
139,West Puente Valley†,9.5
140,West Rancho Dominguez†,9.5
141,West Whittier-Los Nietos†,9.5
142,Westmont†,9.5


In [385]:
df['City'] = df['City'].str.replace('†', '', regex=False)


In [393]:
df = df.drop(index=0).reset_index(drop=True)
df

Unnamed: 0,City,Tax Rate %
0,Agoura Hills,9.5
1,Alhambra,10.25
2,Arcadia,10.25
3,Artesia,9.5
4,Avalon,10.0
...,...,...
138,West Puente Valley,9.5
139,West Rancho Dominguez,9.5
140,West Whittier-Los Nietos,9.5
141,Westmont,9.5


In [401]:
df.to_csv('tax.csv', index=False)