In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Property Information

In [2]:
def soup_from_apn(apn_num):
    # This function returns a soup object from an apn number
    
    url = 'https://housingapp.lacity.org/ReportViolation/Pages/PropAtivityCases?APN='+str(apn_num)+'#divPropDetails'
    result = requests.get(url)
    result.status_code

    src = result.content
    soup = BeautifulSoup(src, 'lxml')
    return soup

In [3]:
def create_prop_info_dict():
    # this function returns a property infomation dictionary of a specific property 

    info = soup.find('div', class_='card-body bg-white')

    key_list = info.find_all('dt')
    value_list = info.find_all('dd')

    prop_info_dict = {}
    # This property info dictionary will be referenced multiple times later to create 

    for key, value in zip(key_list, value_list):
        prop_info_dict[key.text[:-1]] = value.text
        # this is to remove the extra colon in the key list

    return prop_info_dict

### Property Cases

In [4]:
def create_prop_cases_df():
    # This function returns property cases dataframe, property case number list, and property cases list
    
    table = soup.find('table', id='dgPropCases2')
    table_rows = table.find_all('tr')
    case_number = []
    prop_cases = []

    prop_cases_header = [i.text for i in table_rows[0].find_all('th')[1:]]

    for tr in table_rows[1:]:
        # the first table row is the table header and therefore doesn't have td
        td = tr.find_all('td')
        row = [i.text for i in td[1:]]
        # skipping first column as it's the hyper link buttom for select
        prop_cases.append(row)
        case_number.append(row[1])

    prop_cases_df = pd.DataFrame(prop_cases, columns=prop_cases_header)
    prop_cases_df['Assessor Parcel Number'] = prop_info_dict['Assessor Parcel Number']
    
    return prop_cases_df, case_number, prop_cases

In [5]:
# url = 'https://housingapp.lacity.org/ReportViolation/Pages/PublicPropertyActivityReport?APN=2012013028&CaseType=1&CaseNo=' + case_number[2]
# result = requests.get(url)
# src = result.content
# soup = BeautifulSoup(src, 'lxml')
# info = soup.find('div', class_='card-body bg-white')

# key_list = info.find_all('dt')
# value_list = info.find_all('dd')
# prop_dict = {}

# for key, value in zip(key_list[:-1], value_list):
#     # the last row is empty
#     prop_dict[key.text[:-1]] = value.text
#     # this is to remove the extra colon in the key list
    
# prop_dict

### Property Activity Report

In [6]:
def create_prop_activity_report_df(apn_num):
    # This function returns prop_activity_report_df, noc, and noc_header 
    
    url = 'https://housingapp.lacity.org/ReportViolation/Pages/PublicPropertyActivityReport?APN=+'+str(apn_num)+'&CaseType=1&CaseNo=' + case_number[0]
    result = requests.get(url)
    src = result.content
    soup = BeautifulSoup(src, 'lxml')
    
    prop_activity_report = []
    noc = []
    noc_header = soup.find('span', id='lblCompNature').text.strip()

    for number in case_number:
        url = 'https://housingapp.lacity.org/ReportViolation/Pages/PublicPropertyActivityReport?APN='+str(apn_num)+'&CaseType=1&CaseNo=' + number

        result = requests.get(url)

        src = result.content
        soup = BeautifulSoup(src, 'lxml')

        table = soup.find('table', id='dgDisplayDates2')
        table_rows = table.find_all('tr')
        table_header = table_rows[0]

        for tr in table_rows[1:]:
            # the first table row is the table header and therefore doesn't have td
            td = tr.find_all('td')
            row = [i.text for i in td]
            row.append(case_number[1])
            row.append(prop_info_dict['Assessor Parcel Number'])

            prop_activity_report.append(row)

        noc.append(soup.find('span', id='lblComplaintNature').text)
        # This is the nature of complaint

    prop_activity_report_header = [i.text for i in table_header.find_all('th')]
    prop_activity_report_header.append('Case Number')
    prop_activity_report_header.append('Assessor Parcel Number')

    prop_activity_report_df = pd.DataFrame(prop_activity_report, columns=prop_activity_report_header)
    
    return prop_activity_report_df, noc, noc_header

In [7]:
soup = soup_from_apn(2012013028)
prop_info_dict = create_prop_info_dict()
prop_cases_df, case_number, prop_cases = create_prop_cases_df()

In [11]:
prop_activity_report_df, noc, noc_header = create_prop_activity_report_df(2012013028)
prop_cases_df[noc_header[:-1]] = noc
complaints_count = len(prop_cases_df[(prop_cases_df['Case Type'] == 'Complaint')])
prop_info_dict['Total Number of Complaints'] = complaints_count
prop_cases_df.head()

Unnamed: 0,Case Type,Case Number,Date Closed,Assessor Parcel Number,Nature of Complaint
0,Complaint,757163,02/27/2020,2012013028,Lack of adequate flow of hot and/or cold runni...
1,Complaint,628954,06/19/2017,2012013028,Leaking or defective plumbing faucet or fixtur...
2,Complaint,628953,06/19/2017,2012013028,Leaking or defective plumbing faucet or fixtur...
3,Complaint,628952,08/17/2017,2012013028,Leaking or defective plumbing faucet or fixtur...
4,Complaint,608891,03/22/2017,2012013028,"Floor covering defective, missing, or unsafe, ..."


In [12]:
prop_activity_report_df.head()

Unnamed: 0,Date,Status,Case Number,Assessor Parcel Number
0,02/27/2020 10:50:00 AM,Complaint Closed,628954,2012013028
1,02/18/2020 04:04:00 PM,Complaint Received,628954,2012013028
2,06/19/2017 07:53:00 AM,Complaint Closed,628954,2012013028
3,06/14/2017 02:19:00 PM,Complaint Received,628954,2012013028
4,06/19/2017 07:52:00 AM,Complaint Closed,628954,2012013028


### Complete LAHD APN Numbers

In [9]:
rso_list = pd.read_csv('RSO List/rso cpra workbook.csv')
apn_list = rso_list['APN'].to_list()

In [10]:
apn_list[:5]

[5533004021, 5503017004, 5504030100, 5467031017, 6063004043]