In [102]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Property Information

In [103]:
url = 'https://housingapp.lacity.org/ReportViolation/Pages/PropAtivityCases?APN=2012013028#divPropDetails'
result = requests.get(url)
result.status_code

200

In [104]:
src = result.content
soup = BeautifulSoup(src, 'lxml')

In [105]:
info = soup.find('div', class_='card-body bg-white')

key_list = info.find_all('dt')
value_list = info.find_all('dd')
prop_info_dict = {}

for key, value in zip(key_list, value_list):
    prop_info_dict[key.text[:-1]] = value.text
    # this is to remove the extra colon in the key list
    
prop_info_dict

{'Assessor Parcel Number': '2012013028',
 'Total Units (legal unit count may vary)': '20',
 'Rent Registration Number': '0295048',
 '*Census Tract': '134304',
 '*Council District': '3',
 'Official Address': '7851 N TOPANGA CANYON BLVD, CANOGA PARK, CA 93063',
 'Total Exemption Units': '0',
 'Rent Office ID': 'Valley',
 'Code Regional Area': 'Valley Regional Office',
 'Year Built': '1974'}

### Property Cases

In [106]:
table = soup.find('table', id='dgPropCases2')
table_rows = table.find_all('tr')
case_number = []

table_header = table_rows[0]
print([i.text for i in table_header.find_all('th')[1:]])

for tr in table_rows[1:]:
    # the first table row is the table header and therefore doesn't have td
    td = tr.find_all('td')
    row = [i.text for i in td[1:]]
    # skipping first column as it's the hyper link buttom for select
    case_number.append(row[1])
    print(row)
    
print(case_number)




['Case Type', 'Case Number', 'Date Closed']
['Complaint', '757163', '02/27/2020']
['Complaint', '628954', '06/19/2017']
['Complaint', '628953', '06/19/2017']
['Complaint', '628952', '08/17/2017']
['Complaint', '608891', '03/22/2017']
['Complaint', '593815', '10/25/2016']
['Complaint', '593105', '10/18/2016']
['Complaint', '572108', '06/28/2016']
['Property Management Training Program', '492351', '\xa0']
['Systematic Code Enforcement Program', '492351', '03/09/2015']
['Complaint', '470860', '05/06/2014']
['Complaint', '397669', '09/05/2012']
['Complaint', '342898', '04/26/2011']
['Systematic Code Enforcement Program', '293723', '05/20/2010']
['Systematic Code Enforcement Program', '100178', '12/28/2006']
['757163', '628954', '628953', '628952', '608891', '593815', '593105', '572108', '492351', '492351', '470860', '397669', '342898', '293723', '100178']


In [107]:
url = 'https://housingapp.lacity.org/ReportViolation/Pages/PublicPropertyActivityReport?APN=2012013028&CaseType=1&CaseNo=' + case_number[1]

result = requests.get(url)

src = result.content
soup = BeautifulSoup(src, 'lxml')

info = soup.find('div', class_='card-body bg-white')

key_list = info.find_all('dt')
value_list = info.find_all('dd')
prop_dict = {}

for key, value in zip(key_list[:-1], value_list):
    # the last row is empty
    prop_dict[key.text[:-1]] = value.text
    # this is to remove the extra colon in the key list
    
prop_dict

{'Assessor Parcel Number': '2012013028',
 'Council District': '3',
 'Census Tract': '134304',
 'Rent Registration Number': '0295048',
 'Historical Preservation Overlay Zone': '',
 'Total Units': '20',
 'Regional Office': 'Valley Regional Office',
 'Regional Office Contact': '(818) 756-1473',
 'Official Address': '7851 N TOPANGA CANYON BLVD, CANOGA PARK 93063',
 'Case Number': '628954',
 'Case Type': 'Complaints',
 'Inspector': '',
 'Case Manager': '',
 'Total Exemption Units': '0'}

### Nature of Complaint

In [108]:
nop = soup.find('span', id='lblCompNature').text.strip()
nop

'Nature of Complaint:'

In [109]:
nop_content = soup.find('span', id='lblComplaintNature').text
nop_content

'Leaking or defective plumbing faucet or fixture, Damaged, defective or unsealed surface of plumbing fixture, Plaster/drywall wall/ceiling covering defective, deteriorated, or paint is peeling, Floor covering defective, missing, or unsafe, Windows, doors, cabinets, and frames not operable, defective, missing, and/or unsanitary, Appliance venting system requires maintenance'

### Property Activity Report

In [115]:
table = soup.find('table', id='dgDisplayDates2')
table_rows = table.find_all('tr')
table_header = table_rows[0]

print([i.text for i in table_header.find_all('th')])

for tr in table_rows[1:]:
    # the first table row is the table header and therefore doesn't have td
    td = tr.find_all('td')
    row = [i.text for i in td]
    print(row)


['Date', 'Status']
['06/19/2017 07:53:00 AM', 'Complaint Closed']
['06/14/2017 02:19:00 PM', 'Complaint Received']


In [111]:
prop_info_dict['Assessor Parcel Number']

'2012013028'