### Web scraping Data Source

In [None]:
# pip install -U selenium

In [None]:
# pip install -U webdriver_manager

In [7]:
import pandas as pd
import os
from selenium import webdriver
from selenium.webdriver.common.by import By # used to import different ways to access data in the XML or HTML file
from selenium.webdriver.chrome.service import Service # no longer need to download a driver file, use service
from webdriver_manager.chrome import ChromeDriverManager # used to manage the Chrome driver to emulate a Chrome web browser

from selenium import webdriver
# needed to randomize time on the page and scrolling
# import time
# import random

In [8]:
# initialize the Selenium web driver (Chrome in this case)
browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# navigate to the web page using the URL

url = "https://www.laalmanac.com/population/po24la.php"

browser.get(url)
browser.maximize_window()


In [33]:
def scrape_population(browser):
    # Find the first <thead> element
    first_thead = browser.find_element(By.XPATH, "//table[1]/thead[1]")
    header_rows = first_thead.find_elements(By.XPATH, ".//tr")
    
    for header_row in header_rows:
        print(header_row.text)  # Process each header row (if needed)

    # Find all <tbody> elements
    tbody_elements = browser.find_elements(By.XPATH, "//tbody")
    print(len(tbody_elements))

    # Create empty lists to store the extracted data
    total_population = []
    american_indian_alaska_native = []
    asian = []
    african_american = []
    native_hawaiian_pacific_islander = []
    white = []
    other_race = []
    two_plus_races = []
    hispanic_latino = []

    # Loop through each <tbody> to get the rows
    for tbody in tbody_elements:
        rows = tbody.find_elements(By.XPATH, ".//tr")  # Fixed relative XPath
        
        for row in rows[::2]:  # Skip every other row
            print(row.text)
            
            # Extract data
            total_population.append(row.find_element(By.XPATH, './td[1]').text)
            american_indian_alaska_native.append(row.find_element(By.XPATH, './td[2]').text)
            asian.append(row.find_element(By.XPATH, './td[3]').text)
            african_american.append(row.find_element(By.XPATH, './td[4]').text)
            native_hawaiian_pacific_islander.append(row.find_element(By.XPATH, './td[5]').text)
            white.append(row.find_element(By.XPATH, './td[6]').text)
            other_race.append(row.find_element(By.XPATH, './td[7]').text)
            two_plus_races.append(row.find_element(By.XPATH, './td[8]').text)
            hispanic_latino.append(row.find_element(By.XPATH, './td[9]').text)

    return (
        total_population, american_indian_alaska_native, asian,
        african_american, native_hawaiian_pacific_islander, white,
        other_race, two_plus_races, hispanic_latino
    )

# Call the function and unpack results
(
    total_population, american_indian_alaska_native, asian,
    african_american, native_hawaiian_pacific_islander, white,
    other_race, two_plus_races, hispanic_latino
) = scrape_population(browser)

Neighborhood Total Population, All Races Population of One Race Alone† Two or More Races** Hispanic or Latino††
American Indian & Alaska Native Asian Black or African American Native Hawaiian & Other Pacific Islander White (Not Hispanic or Latino)‡ Some Other Race
51
City of L.A. 3,857,897 44,777 463,960 328,445 5,627 1,092,687 967,895 607,324 1,822,163
Adams-Normandie 6,791 129 394 783 63 663 2,673 944 4,783
Arleta 33,884 452 4,430 181 46 2,958 12,733 7,266 25,861
Arlington Heights 13,269 121 1,919 2,293 0 744 5,048 1,755 8,195
Arlington Park 2,896 68 32 1,169 0 122 929 95 1,508
Atwater Village 10,098 41 2,129 113 0 3,953 2,075 1,278 3,238
Baldwin Hills 25,210 216 535 13,592 67 2,122 4,224 3,546 8,433
Bel Air 7,351 3 972 242 0 5,382 109 576 370
Beverly Crest 5,662 0 301 135 0 4,668 36 405 258
Beverly Glen 3,880 0 431 45 0 2,741 84 559 447
Beverlywood 4,867 0 789 354 0 2,918 87 430 521
Boyle Heights 85,993 1,772 2,255 1,047 26 2,273 56,200 12,298 79,855
Brentwood 33,341 62 2,735 381 55

West Adams 22,914 113 618 5,664 0 2,338 6,609 4,253 13,711
West Adams Terrace/Kinney Heights/Berkely Square 9,085 35 537 3,353 0 799 1,779 1,525 3,972
West Hills 33,097 376 4,838 1,564 39 19,992 1,421 3,771 4,273
West LA 48,625 78 10,035 1,818 70 25,711 3,305 5,399 7,753
West Toluca Lake 4,215 0 484 279 0 2,114 449 751 1,239
Westchester 41,624 370 6,857 4,122 87 19,930 1,572 6,689 7,501
Western Heights/Sugar Hill/West Adams Heights 2,965 36 564 596 0 272 1,176 185 1,235
Western Wilton 4,564 0 2,375 122 0 302 749 559 1,514
Westlake & Historic Filipinotown 114,106 3,061 23,612 8,430 220 7,271 46,918 12,323 72,714
Westside Village 10,029 15 3,025 511 64 4,302 843 888 1,477
Westwood 51,900 187 13,918 2,350 170 25,775 3,021 4,597 6,855
Wilmington 52,030 540 1,635 1,697 161 1,839 12,306 7,379 46,595
Wilshire Center 60,303 949 20,745 3,619 117 5,741 18,871 6,648 28,516
Wilshire Vista 2,201 9 109 278 0 831 308 576 407
Wilshire Vista Heights 2,773 0 243 717 0 937 459 263 672
Windsor Square 2,78

In [34]:
# form a pandas dataframe
scrape_population_df = pd.DataFrame({
        "Total Population": total_population,
        "American Indian and Alaska Native": american_indian_alaska_native,
        "Asian": asian,
        "African American": african_american,
        "Native Hawaiian and Other Pacific Islander": native_hawaiian_pacific_islander,
        "White": white,
        "Some Other Race": other_race,
        "Two or More Races":two_plus_races,
        "Hispanic or Latino":hispanic_latino,
    
})


# display the DataFrame
scrape_population_df

Unnamed: 0,Total Population,American Indian and Alaska Native,Asian,African American,Native Hawaiian and Other Pacific Islander,White,Some Other Race,Two or More Races,Hispanic or Latino
0,City of L.A.,3857897,44777,463960,328445,5627,1092687,967895,607324
1,Adams-Normandie,6791,129,394,783,63,663,2673,944
2,Arleta,33884,452,4430,181,46,2958,12733,7266
3,Arlington Heights,13269,121,1919,2293,0,744,5048,1755
4,Arlington Park,2896,68,32,1169,0,122,929,95
...,...,...,...,...,...,...,...,...,...
147,Wilshire Vista,2201,9,109,278,0,831,308,576
148,Wilshire Vista Heights,2773,0,243,717,0,937,459,263
149,Windsor Square,2781,0,1246,109,0,794,100,481
150,Winnetka,75191,694,13114,2999,46,23594,20575,9824


In [36]:
total_population = scrape_population_df.to_csv('total_la_population.csv', index=False)