In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Property Information

In [2]:
url = 'https://housingapp.lacity.org/ReportViolation/Pages/PropAtivityCases?APN=2012013028#divPropDetails'
result = requests.get(url)
result.status_code

200

In [3]:
src = result.content
soup = BeautifulSoup(src, 'lxml')

In [4]:
info = soup.find('div', class_='card-body bg-white')

key_list = info.find_all('dt')
value_list = info.find_all('dd')
prop_info_dict = {}

for key, value in zip(key_list, value_list):
    prop_info_dict[key.text[:-1]] = value.text
    # this is to remove the extra colon in the key list
    
prop_info_dict

{'Assessor Parcel Number': '2012013028',
 'Total Units (legal unit count may vary)': '20',
 'Rent Registration Number': '0295048',
 '*Census Tract': '134304',
 '*Council District': '3',
 'Official Address': '7851 N TOPANGA CANYON BLVD, CANOGA PARK, CA 93063',
 'Total Exemption Units': '0',
 'Rent Office ID': 'Valley',
 'Code Regional Area': 'Valley Regional Office',
 'Year Built': '1974'}

### Property Cases

In [5]:
table = soup.find('table', id='dgPropCases2')
table_rows = table.find_all('tr')
case_number = []
prop_cases = []

prop_cases_header = [i.text for i in table_rows[0].find_all('th')[1:]]

for tr in table_rows[1:]:
    # the first table row is the table header and therefore doesn't have td
    td = tr.find_all('td')
    row = [i.text for i in td[1:]]
    # skipping first column as it's the hyper link buttom for select
    prop_cases.append(row)
    case_number.append(row[1])
    
prop_cases_df = pd.DataFrame(prop_cases, columns=prop_cases_header)
prop_cases_df['Assessor Parcel Number'] = prop_info_dict['Assessor Parcel Number']

In [6]:
url = 'https://housingapp.lacity.org/ReportViolation/Pages/PublicPropertyActivityReport?APN=2012013028&CaseType=1&CaseNo=' + case_number[2]

result = requests.get(url)

src = result.content
soup = BeautifulSoup(src, 'lxml')

info = soup.find('div', class_='card-body bg-white')

key_list = info.find_all('dt')
value_list = info.find_all('dd')
prop_dict = {}

for key, value in zip(key_list[:-1], value_list):
    # the last row is empty
    prop_dict[key.text[:-1]] = value.text
    # this is to remove the extra colon in the key list
    
prop_dict

{'Assessor Parcel Number': '2012013028',
 'Council District': '3',
 'Census Tract': '134304',
 'Rent Registration Number': '0295048',
 'Historical Preservation Overlay Zone': '',
 'Total Units': '20',
 'Regional Office': 'Valley Regional Office',
 'Regional Office Contact': '(818) 756-1473',
 'Official Address': '7851 N TOPANGA CANYON BLVD, CANOGA PARK 93063',
 'Case Number': '628953',
 'Case Type': 'Complaints',
 'Inspector': '',
 'Case Manager': '',
 'Total Exemption Units': '0'}

### Property Activity Report

In [7]:
prop_activity_report = []
noc = []
noc_header = soup.find('span', id='lblCompNature').text.strip()

for number in case_number:
    url = 'https://housingapp.lacity.org/ReportViolation/Pages/PublicPropertyActivityReport?APN=2012013028&CaseType=1&CaseNo=' + number

    result = requests.get(url)

    src = result.content
    soup = BeautifulSoup(src, 'lxml')

    table = soup.find('table', id='dgDisplayDates2')
    table_rows = table.find_all('tr')
    table_header = table_rows[0]

    for tr in table_rows[1:]:
        # the first table row is the table header and therefore doesn't have td
        td = tr.find_all('td')
        row = [i.text for i in td]
        row.append(case_number[1])
        row.append(prop_info_dict['Assessor Parcel Number'])

        prop_activity_report.append(row)
        
    noc.append(soup.find('span', id='lblComplaintNature').text)
    # This is the nature of complaint
    
prop_activity_report_header = [i.text for i in table_header.find_all('th')]
prop_activity_report_header.append('Case Number')
prop_activity_report_header.append('Assessor Parcel Number')

prop_activity_report_df = pd.DataFrame(prop_activity_report, columns=prop_activity_report_header)
prop_activity_report_df

Unnamed: 0,Date,Status,Case Number,Assessor Parcel Number
0,02/27/2020 10:50:00 AM,Complaint Closed,628954,2012013028
1,02/18/2020 04:04:00 PM,Complaint Received,628954,2012013028
2,06/19/2017 07:53:00 AM,Complaint Closed,628954,2012013028
3,06/14/2017 02:19:00 PM,Complaint Received,628954,2012013028
4,06/19/2017 07:52:00 AM,Complaint Closed,628954,2012013028
5,06/14/2017 02:12:00 PM,Complaint Received,628954,2012013028
6,08/17/2017 03:29:00 PM,Complaint Closed,628954,2012013028
7,08/17/2017 03:29:00 PM,Violations Corrected,628954,2012013028
8,08/17/2017 02:43:00 PM,All Violations Resolved Date,628954,2012013028
9,08/17/2017 02:30:00 PM,Site Visit/Compliance Inspection,628954,2012013028


In [8]:
prop_cases_df[noc_header[:-1]] = noc

In [9]:
prop_cases_df

Unnamed: 0,Case Type,Case Number,Date Closed,Assessor Parcel Number,Nature of Complaint
0,Complaint,757163,02/27/2020,2012013028,Lack of adequate flow of hot and/or cold runni...
1,Complaint,628954,06/19/2017,2012013028,Leaking or defective plumbing faucet or fixtur...
2,Complaint,628953,06/19/2017,2012013028,Leaking or defective plumbing faucet or fixtur...
3,Complaint,628952,08/17/2017,2012013028,Leaking or defective plumbing faucet or fixtur...
4,Complaint,608891,03/22/2017,2012013028,"Floor covering defective, missing, or unsafe, ..."
5,Complaint,593815,10/25/2016,2012013028,"Floor covering defective, missing, or unsafe, ..."
6,Complaint,593105,10/18/2016,2012013028,"Floor covering defective, missing, or unsafe, ..."
7,Complaint,572108,06/28/2016,2012013028,"Windows, doors, cabinets, and frames not opera..."
8,Property Management Training Program,492351,,2012013028,
9,Systematic Code Enforcement Program,492351,03/09/2015,2012013028,


In [10]:
complaints_count = len(prop_cases_df[(prop_cases_df['Case Type'] == 'Complaint')])

In [11]:
prop_info_dict['Total Number of Complaints'] = complaints_count
prop_info_dict

{'Assessor Parcel Number': '2012013028',
 'Total Units (legal unit count may vary)': '20',
 'Rent Registration Number': '0295048',
 '*Census Tract': '134304',
 '*Council District': '3',
 'Official Address': '7851 N TOPANGA CANYON BLVD, CANOGA PARK, CA 93063',
 'Total Exemption Units': '0',
 'Rent Office ID': 'Valley',
 'Code Regional Area': 'Valley Regional Office',
 'Year Built': '1974',
 'Total Number of Complaints': 11}