In [1]:
import requests
from bs4 import BeautifulSoup


### Property Information

In [2]:
url = 'https://housingapp.lacity.org/ReportViolation/Pages/PropAtivityCases?APN=2012013028#divPropDetails'
result = requests.get(url)
result.status_code

200

In [3]:
src = result.content
soup = BeautifulSoup(src, 'lxml')
content = soup.find('div', id='divPropDetails')

prop_info_title = content.h1.text
prop_info_title

'PROPERTY INFORMATION'

In [4]:
content

<div class="card bg-info text-left card-main" id="divPropDetails" name="divPropDetails" tabindex="-1">
<div class="card-header"><h1 class="card-header-title mb-0">PROPERTY INFORMATION</h1></div>
<div class="card-body bg-white">
<dl class="row">
<div class="col-md-5">
<dt><strong class="mr-2">Assessor Parcel Number:</strong></dt>
<dd><span id="lblAPN2">2012013028</span></dd>
<dt><strong class="mr-2">Total Units (legal unit count may vary):</strong></dt>
<dd><span id="lblTotalPropUnits">20</span></dd>
<dt><strong class="mr-2">Rent Registration Number:</strong></dt>
<dd><span id="lblRSU">0295048</span></dd>
<dt><strong class="mr-2">*Census Tract:</strong></dt>
<dd><span id="lblCT">134304</span></dd>
<dt><strong class="mr-2">*Council District:</strong></dt>
<dd><span id="lblCD">3</span></dd>
</div>
<div class="col-md-7">
<dt><strong class="mr-2">Official Address:</strong></dt>
<dd><span id="lblAddress">7851 N TOPANGA CANYON BLVD, CANOGA PARK, CA 93063</span></dd>
<dt><strong class="mr-2">T

In [5]:
info = soup.find('div', class_='card-body bg-white')

info_keys = []
for item in info.find_all('dt'):
    info_keys.append(item.text) 
    
info_keys

['Assessor Parcel Number:',
 'Total Units (legal unit count may vary):',
 'Rent Registration Number:',
 '*Census Tract:',
 '*Council District:',
 'Official Address:',
 'Total Exemption Units:',
 'Rent Office ID:',
 'Code Regional Area:',
 'Year Built:']

In [6]:
info_values = []

for item in info.find_all('dd'):
    info_values.append(item.text)
    
info_values

['2012013028',
 '20',
 '0295048',
 '134304',
 '3',
 '7851 N TOPANGA CANYON BLVD, CANOGA PARK, CA 93063',
 '0',
 'Valley',
 'Valley Regional Office',
 '1974']

In [7]:
key_list = info.find_all('dt')
value_list = info.find_all('dd')
prop_dict = {}

for key, value in zip(key_list, value_list):
    prop_dict[key.text[:-1]] = value.text
    # this is to remove the extra colon in the key list
    
prop_dict

{'Assessor Parcel Number': '2012013028',
 'Total Units (legal unit count may vary)': '20',
 'Rent Registration Number': '0295048',
 '*Census Tract': '134304',
 '*Council District': '3',
 'Official Address': '7851 N TOPANGA CANYON BLVD, CANOGA PARK, CA 93063',
 'Total Exemption Units': '0',
 'Rent Office ID': 'Valley',
 'Code Regional Area': 'Valley Regional Office',
 'Year Built': '1974'}

### Property Cases

In [8]:
table = soup.find('table', id='dgPropCases2')
table_rows = table.find_all('tr')
table_rows[0]

<tr>
<th align="center" scope="col">Action</th><th scope="col">Case Type</th><th scope="col">Case Number</th><th scope="col">Date Closed</th>
</tr>

In [9]:
table = soup.find('table', id='dgPropCases2')
table_rows = table.find_all('tr')
case_number = []

for tr in table_rows[1:]:
    # the first table row is the table header and therefore doesn't have td
    td = tr.find_all('td')
    row = [i.text for i in td]
    case_number.append(row[2])
    print(row)
    
print(case_number)

['\nSelect\n', 'Complaint', '757163', '02/27/2020']
['\nSelect\n', 'Complaint', '628954', '06/19/2017']
['\nSelect\n', 'Complaint', '628953', '06/19/2017']
['\nSelect\n', 'Complaint', '628952', '08/17/2017']
['\nSelect\n', 'Complaint', '608891', '03/22/2017']
['\nSelect\n', 'Complaint', '593815', '10/25/2016']
['\nSelect\n', 'Complaint', '593105', '10/18/2016']
['\nSelect\n', 'Complaint', '572108', '06/28/2016']
['\nSelect\n', 'Property Management Training Program', '492351', '\xa0']
['\nSelect\n', 'Systematic Code Enforcement Program', '492351', '03/09/2015']
['\nSelect\n', 'Complaint', '470860', '05/06/2014']
['\nSelect\n', 'Complaint', '397669', '09/05/2012']
['\nSelect\n', 'Complaint', '342898', '04/26/2011']
['\nSelect\n', 'Systematic Code Enforcement Program', '293723', '05/20/2010']
['\nSelect\n', 'Systematic Code Enforcement Program', '100178', '12/28/2006']
['757163', '628954', '628953', '628952', '608891', '593815', '593105', '572108', '492351', '492351', '470860', '397669', 

In [15]:
url = 'https://housingapp.lacity.org/ReportViolation/Pages/PublicPropertyActivityReport?APN=2012013028&CaseType=1&CaseNo=' + case_number[1]
# case number in the url is referencing the case number from the property cases table above

result = requests.get(url)

src = result.content
soup = BeautifulSoup(src, 'lxml')

info = soup.find('div', class_='card-body bg-white')

key_list = info.find_all('dt')
value_list = info.find_all('dd')
prop_dict = {}

for key, value in zip(key_list[:-1], value_list[:-1]):
    # the last row is empty
    prop_dict[key.text[:-1]] = value.text
    # this is to remove the extra colon in the key list
    
prop_dict

{'Assessor Parcel Number': '2012013028',
 'Council District': '3',
 'Census Tract': '134304',
 'Rent Registration Number': '0295048',
 'Historical Preservation Overlay Zone': '',
 'Total Units': '20',
 'Regional Office': 'Valley Regional Office',
 'Regional Office Contact': '(818) 756-1473',
 'Official Address': '7851 N TOPANGA CANYON BLVD, CANOGA PARK 93063',
 'Case Number': '628954',
 'Case Type': 'Complaints',
 'Inspector': '',
 'Case Manager': '',
 'Total Exemption Units': '0'}

### Nature of Complaint

In [16]:
soup.find('span', id='lblCompNature').text.strip()

'Nature of Complaint:'

In [17]:
soup.find('span', id='lblComplaintNature').text

'Leaking or defective plumbing faucet or fixture, Damaged, defective or unsealed surface of plumbing fixture, Plaster/drywall wall/ceiling covering defective, deteriorated, or paint is peeling, Floor covering defective, missing, or unsafe, Windows, doors, cabinets, and frames not operable, defective, missing, and/or unsanitary, Appliance venting system requires maintenance'

### Property Activity Report

In [18]:
table = soup.find('table', id='dgDisplayDates2')
table_rows = table.find_all('tr')

for tr in table_rows[1:]:
    # the first table row is the table header and therefore doesn't have td
    td = tr.find_all('td')
    row = [i.text for i in td]
    print(row)

['06/19/2017 07:53:00 AM', 'Complaint Closed']
['06/14/2017 02:19:00 PM', 'Complaint Received']
