# Beautifying neighborhoods in St. Louis

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd



## Murals

In [2]:
# Num of murals in three key neighborhoods
# NICE TO HAVE - Num of murals in neighborhoods adjacent to three key neighborhoods

murals = [
    {"neighborhood_name": "Old North St. Louis", "num_murals": "??"},
    {"neighborhood_name": "Forest Park South East", "num_murals": "??"},
    {"neighborhood_name": "Covenant Blu-Grand Center ", "num_murals": "??"}
]

In [3]:
df = pd.DataFrame(murals)
df

Unnamed: 0,neighborhood_name,num_murals
0,Old North St. Louis,1
1,Forest Park South East,2
2,Covenant Blu-Grand Center,3


In [4]:
df.to_csv("murals.csv", index=False)

## Demographics

In [5]:
# !pip install selenium
# !pip install webdriver-manager

In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

In [7]:
driver = webdriver.Chrome(ChromeDriverManager().install())




  driver = webdriver.Chrome(ChromeDriverManager().install())


In [8]:
driver.get("https://www.stlouis-mo.gov/government/departments/planning/research/census/data/neighborhoods/index.cfm?censusYear=2020&comparisonYear=0&measure=P0010001")

In [9]:
driver.find_element(By.XPATH, '//*[@id="DataTables_Table_0"]/tbody/tr[19]/td[1]/a').click()

In [10]:
pd.read_html(driver.page_source, flavor="html5lib")[0]

Unnamed: 0.1,Unnamed: 0,2020
0,Total Population,3041
1,White alone,945
2,Black or African-American alone,1614
3,Asian-American alone,264
4,American Indian and Alaska Native alone,4
5,Native Hawaiian or Other Pacific Islander alone,2
6,Some Other Race alone,78
7,Two or More Races,134


In [55]:
# Blu Grand Center = 77
# Old North St. Louis = 63
# Forest Park South East = 39

ids = [77, 63, 39]
neighborhoods = []

for id in ids:
    print("Searching for", id)
    response = requests.get(f"https://www.stlouis-mo.gov/government/departments/planning/research/census/data/neighborhoods/neighborhood.cfm?number={id}&censusYear=2020&comparisonYear=0")
    doc = BeautifulSoup(response.text)

    for row in doc.find(id="data1View").find("tbody"):
        if row.find("th") != -1:
            group = row.find("th").text

        if row.find("td") != -1:
            pop_2020 = row.find("td").text.strip()

            neighborhood = {
                "id": id,
                "group": group,
                "pop_2020": pop_2020
            }

            neighborhoods.append(neighborhood)
        
print(neighborhoods)

Searching for 77
Searching for 63
Searching for 39
[{'id': 77, 'group': 'Total Population', 'pop_2020': '3,041'}, {'id': 77, 'group': 'White alone', 'pop_2020': '945'}, {'id': 77, 'group': 'Black or African-American alone', 'pop_2020': '1,614'}, {'id': 77, 'group': 'Asian-American alone', 'pop_2020': '264'}, {'id': 77, 'group': 'American Indian and Alaska Native alone', 'pop_2020': '4'}, {'id': 77, 'group': 'Native Hawaiian or Other Pacific Islander alone', 'pop_2020': '2'}, {'id': 77, 'group': 'Some Other Race alone', 'pop_2020': '78'}, {'id': 77, 'group': 'Two or More Races', 'pop_2020': '134'}, {'id': 63, 'group': 'Total Population', 'pop_2020': '1,488'}, {'id': 63, 'group': 'White alone', 'pop_2020': '190'}, {'id': 63, 'group': 'Black or African-American alone', 'pop_2020': '1,227'}, {'id': 63, 'group': 'Asian-American alone', 'pop_2020': '1'}, {'id': 63, 'group': 'American Indian and Alaska Native alone', 'pop_2020': '1'}, {'id': 63, 'group': 'Native Hawaiian or Other Pacific Isla

In [56]:
df = pd.DataFrame(neighborhoods)
df.to_csv("neighborhood_demo.csv", index=False)

In [66]:
df

Unnamed: 0,id,group,pop_2020
0,77,Total Population,3041
1,77,White alone,945
2,77,Black or African-American alone,1614
3,77,Asian-American alone,264
4,77,American Indian and Alaska Native alone,4
5,77,Native Hawaiian or Other Pacific Islander alone,2
6,77,Some Other Race alone,78
7,77,Two or More Races,134
8,63,Total Population,1488
9,63,White alone,190


In [None]:
# TODO 
# - NEED TO TRANSPOSE THE TABLE TO ACTUALLY DO THESE CALCS

# # Blu Grand Center = 77
# (945 / 3041) * 100 # white_pop, 31%
# (1614 / 3041) * 100 # black_pop, 53%

# # Old North St. Louis = 63
# Total Population	1,488
# White alone	190
# Black or African-American alone	1,227

# (190 / 1488) * 100 # white_pop, 12%
# (1227 / 1488) * 100 # black_pop 82%

# # Forest Park South East = 39
# 39	Total Population	3,458
# 17	39	White alone	1,598
# 18	39	Black or African-American alone	1,249

# (1598 / 3458) * 100 # white_pop, 46%
# (1249 / 3458) * 100 # black_pop, 36%

## Population by neighborhood

In [13]:
response = requests.get("https://www.stlouis-mo.gov/government/departments/planning/research/census/data/neighborhoods/index.cfm?censusYear=2020&comparisonYear=0&measure=P0010001")
doc = BeautifulSoup(response.text)

table = doc.find("tbody").find_all("tr")

rows = []

for row in table:
    info = row.text.strip() \
        .replace("\n\n\n", "\n") \
        .replace("\n\n", "\n")
    
    rows.append(info)
    
print(rows)

['Franz Park\n2,260\n100.0%\n0.7%', 'Tiffany\n915\n100.0%\n0.3%', 'Botanical Heights\n1,196\n100.0%\n0.4%', 'Kings Oak\n167\n100.0%\n0.1%', 'Cheltenham\n1,260\n100.0%\n0.4%', 'Clayton-Tamm\n2,348\n100.0%\n0.8%', 'Forest Park South East\n3,458\n100.0%\n1.1%', 'Hi-Pointe\n2,151\n100.0%\n0.7%', 'Midtown\n6,862\n100.0%\n2.3%', 'Jeff Vanderlou\n4,209\n100.0%\n1.4%', 'Wydown Skinker\n1,121\n100.0%\n0.4%', 'Covenant Blu-Grand Center\n3,041\n100.0%\n1.0%', 'Central West End\n16,670\n100.0%\n5.5%', 'Vandeventer\n2,041\n100.0%\n0.7%', 'Visitation Park\n924\n100.0%\n0.3%', 'Lewis Place\n1,359\n100.0%\n0.5%', 'Fountain Park\n1,075\n100.0%\n0.4%', 'The Ville\n1,427\n100.0%\n0.5%', 'Academy\n2,355\n100.0%\n0.8%', 'Fairground Park\n12\n100.0%\n0.0%', 'West End\n6,846\n100.0%\n2.3%', 'Greater Ville\n4,545\n100.0%\n1.5%', 'Fairground Neighborhood\n1,157\n100.0%\n0.4%', 'Hamilton Heights\n2,187\n100.0%\n0.7%', 'Kingsway East\n2,502\n100.0%\n0.8%', 'Kingsway West\n2,604\n100.0%\n0.9%', 'Patch\n2,842\n100

In [14]:
df = pd.DataFrame(rows)
df

Unnamed: 0,0
0,"Franz Park\n2,260\n100.0%\n0.7%"
1,Tiffany\n915\n100.0%\n0.3%
2,"Botanical Heights\n1,196\n100.0%\n0.4%"
3,Kings Oak\n167\n100.0%\n0.1%
4,"Cheltenham\n1,260\n100.0%\n0.4%"
...,...
84,"Downtown West\n5,115\n100.0%\n1.7%"
85,Missouri Botanical Garden\n3\n100.0%\n0.0%
86,Penrose Park\n22\n100.0%\n0.0%
87,Bellefontaine/Calvary Cemetery\n0


In [15]:
df = df[0].str.split("\n", expand=True)

In [16]:
df

Unnamed: 0,0,1,2,3
0,Franz Park,2260,100.0%,0.7%
1,Tiffany,915,100.0%,0.3%
2,Botanical Heights,1196,100.0%,0.4%
3,Kings Oak,167,100.0%,0.1%
4,Cheltenham,1260,100.0%,0.4%
...,...,...,...,...
84,Downtown West,5115,100.0%,1.7%
85,Missouri Botanical Garden,3,100.0%,0.0%
86,Penrose Park,22,100.0%,0.0%
87,Bellefontaine/Calvary Cemetery,0,,


In [17]:
df = df.rename(columns={
    0: "neighborhood",
    1: "2020"
})

In [18]:
df = df.drop([2,3], axis=1)

In [19]:
df.to_csv("neighborhood_pop.csv", index=False)

### TODO - CALCULATE MURALS PER CAPITA

In [20]:
# murals_in_old_north_st_louis / df[df["neighborhood"] == "Old North St. Louis"]

In [21]:
# murals_in_forest_park / df[df["neighborhood"] == "Forest Park South East"]

In [22]:
# murals_in_convenant-blu / df[df["neighborhood"] == "Covenant Blu-Grand Center"]

## Walkability

In [42]:
response = requests.get("https://www.walkscore.com/MO/St._Louis")
doc = BeautifulSoup(response.text)

In [43]:
table = doc.find("table", class_="tablesorter").find("tbody")

neighborhoods = []

for row in table:
    if row.find("td") != -1:
        rank = row.find("td").text
        name = row.find("td", class_="name").text
        walk_score = row.find("td", class_="walkscore").text
        transit_score = row.find("td", class_="transitscore").text
        bike_score = row.find("td", class_="bikescore").text
        population = row.find("td", class_="population").text
        
        neighborhood = {
            "rank": rank,
            "name": name,
            "walk": walk_score,
            "transit_score": transit_score,
            "bike_score": bike_score,
            "population": population
        }
        
        neighborhoods.append(neighborhood)

In [44]:
df = pd.DataFrame(neighborhoods)
df.head(10)

Unnamed: 0,rank,name,walk,transit_score,bike_score,population
0,1,Benton Park West,88,43,63,4264
1,2,Downtown,87,64,61,7670
2,3,Tower Grove East,85,43,76,5880
3,4,Soulard,85,39,59,3429
4,5,Gravois Park,84,44,57,5170
5,6,Benton Park,84,42,57,3436
6,7,Lafayette Square,82,51,63,2056
7,8,Forest Park Southeast,82,55,78,2911
8,9,Near Southside,81,61,59,2360
9,10,Fox Park,80,40,62,2600


In [26]:
df.to_csv("neighborhood_walk.csv", index=False)

### St. Louis score

In [27]:
stl_walk = 66
stl_transit = 43
stl_bike = 58

In [53]:
# (v2-v1/v1)*100
# Forest Park Southeast percent change

((82 - 66) / 66) * 100

24.242424242424242

### Median walk score

In [28]:
df["walk"].median()

64.0

### Average walk score

In [29]:
round(df["walk"].astype(int).mean())

64

In [30]:
df[df["name"] == "Forest Park Southeast"]

Unnamed: 0,rank,name,walk,transit_score,bike_score,population
7,8,Forest Park Southeast,82,55,78,2911


In [31]:
df[df["name"] == "Old North St. Louis"]

Unnamed: 0,rank,name,walk,transit_score,bike_score,population
36,37,Old North St. Louis,64,43,65,1917


In [32]:
df[df["name"] == "Grand Center"]

Unnamed: 0,rank,name,walk,transit_score,bike_score,population
24,25,Grand Center,70,51,66,3492


## Neighborhoods list

In [34]:
df = pd.read_csv("neighborhood_list.csv")
df

Unnamed: 0,NHD_NUM,NHD_NAME,ANGLE,NHD_NUMTXT,NHD_NUM_ST
0,43,Franz Park,0.0,43 Franz Park,43
1,29,Tiffany,0.0,29 Tiffany,29
2,28,Botanical Heights,0.0,28 Botanical Heights,28
3,40,Kings Oak,0.0,40 Kings Oak,40
4,41,Cheltenham,0.0,41 Cheltenham,41
...,...,...,...,...,...
83,61,Old North St. Louis,0.0,63 Old North St. Louis,63
84,61,Carr Square,0.0,61 Carr Square,61
85,61,Hyde Park,0.0,65 Hyde Park,65
86,61,St. Louis Place,0.0,60 St. Louis Place,60


In [35]:
df = df.rename(columns={
    "NHD_NAME": "name",
    "NHD_NUM_ST": "ID"
})

df = df.drop("NHD_NUM", axis=1)
df = df.drop("ANGLE", axis=1)
df = df.drop("NHD_NUMTXT", axis=1)

In [36]:
df.sort_values("name", ascending=True)

Unnamed: 0,name,ID
18,Academy,51
75,Baden,74
71,Bellefontaine/Calvary Cemetery,86
44,Benton Park,22
41,Benton Park West,30
...,...,...
72,Walnut Park West,76
67,Wells Goodfellow,50
20,West End,48
34,Willmore Park,88


## Crime

In [37]:
response = requests.get("https://graphics.stltoday.com/apps/crime/st-louis-city/")
doc = BeautifulSoup(response.text)

neighborhoods = doc.find("ul").find_all("li")
base_url = "https://graphics.stltoday.com"
urls = []

for neighborhood in neighborhoods[4:]:
    url = base_url + neighborhood.a.get('href')
    urls.append(url)

# print(urls)

In [38]:
df = pd.DataFrame(urls)
df

Unnamed: 0,0
0,https://graphics.stltoday.com/apps/crime/st-lo...
1,https://graphics.stltoday.com/apps/crime/st-lo...
2,https://graphics.stltoday.com/apps/crime/st-lo...
3,https://graphics.stltoday.com/apps/crime/st-lo...
4,https://graphics.stltoday.com/apps/crime/st-lo...
...,...
147,https://graphics.stltoday.com/apps/crime/st-lo...
148,https://graphics.stltoday.com/apps/crime/st-lo...
149,https://graphics.stltoday.com/apps/crime/st-lo...
150,https://graphics.stltoday.com/apps/crime/st-lo...


In [39]:
neighborhoods = ["forest-park-south-east/", "old-north-st-louis/", "covenant-blu-grand-center/"]

neighborhood_info = []

for neighborhood in neighborhoods:
    print("Searching for", neighborhood)
    print("Visiting the search page")

    driver.get(f"https://graphics.stltoday.com/apps/crime/st-louis-city/{neighborhood}")

    name = driver.find_elements(By.TAG_NAME, "h1")[3].text
    change = driver.find_elements(By.CLASS_NAME, "crimechange")[0].text
    per_capita_rate = driver.find_elements(By.CLASS_NAME, "percapitarate")[0].text
    per_capita_rank = driver.find_elements(By.CLASS_NAME, "percapitarank")[0].text

    info = {
        "neighborhood": name,
        "change": change,
        "per_capita_rate": per_capita_rate,
        "per_capita_rank": per_capita_rank
    }
    
    neighborhood_info.append(info)

print(neighborhood_info)

Searching for forest-park-south-east/
Visiting the search page
Searching for old-north-st-louis/
Visiting the search page
Searching for covenant-blu-grand-center/
Visiting the search page
[{'neighborhood': 'Forest Park South East', 'change': 'down 7.51%', 'per_capita_rate': '67.51', 'per_capita_rank': 'higher than 69 neighborhoods (out of 77)'}, {'neighborhood': 'Old North St. Louis', 'change': 'down 13.45%', 'per_capita_rate': '53.76', 'per_capita_rank': 'higher than 64 neighborhoods (out of 77)'}, {'neighborhood': 'Covenant Blu-Grand Center', 'change': 'down 28.5%', 'per_capita_rate': '42.95', 'per_capita_rank': 'higher than 51 neighborhoods (out of 77)'}]


In [40]:
df = pd.DataFrame(neighborhood_info)
df.to_csv("neighborhood_crime.csv", index=False)

## NICE TO HAVE

## Adjacent Neighborhoods

In [None]:
# forest_park_southeast = ["Central West End", "Midtown", "Botanical Heights", "Southwest Garden", "The Hill", "Kings Oak"]
# convenant_blu = ["Central West End", "Midtown", "Jeff Vanderlou", "Vandeveter", "Greater Ville"]
# old_north_st_louis = ["Hyde Park", "St. Louis Place", "Carr Square", "Columbus Square", "Near North Riverfront"]