### Description
Author: T. Majidzadeh

Date Created: February 25, 2025

Date Updated: February 25, 2025

Purpose: Standardize formatting of building data and append with simplified columns

In [2]:
import pandas as pd
import numpy as np
import json
import os
import re

In [3]:
### Patrick's files:
patrick_files = [
    "air_communities_listings.csv",
    "allied_orion_apartments.csv",
    "bell_partners_listings.csv",
    "bozzuto_listings.csv",
    "brookfield_properties_listings.csv",
    "camden_property_listings.csv",
    "conam_management_listings.csv",
    "dayrise_residential_listings.csv",
    "eci_group_listings.csv",
    "equity_residential_listings.csv",
    "fpi_management_listings.csv",
    "greystar_listings.csv",
    "morgan_properties_listings.csv",
    "sherman_associates_listings.csv",
    "the_related_companies_listings.csv",
    "windsor_properties_listings.csv",
    "udr_listings.csv"
]

patrick_names = [
    "Air Communities",
    "Allied Orion Group",
    "Bell Partners",
    "Bozzuto Management Co.",
    "Brookfield Properties Multifamily",
    "Camden Property Trust",
    "CONAM Management Corp.",
    "Dayrise Residential",
    "ECI Group",
    "Equity Residential Services",
    "FPI Management",
    "Greystar Management Services",
    "Morgan Properties Management Co.",
    "Sherman Associates",
    "The Related Companies",
    "Windsor Property Management Co.",
    "UDR Inc."
]

In [4]:
def clean_file_patrick(file, name):
    '''
    Import and clean a file gathered by Patrick.
    '''
    df = pd.read_csv('..\\data\\pm_data_clean\\'+file)
    df.dropna(subset='Title', inplace=True)
    df.rename(columns={
        "Title" : "building_name",
        "Address": "raw_address"
    }, inplace=True)
    df["company_name"] = name
    df = df[['company_name', 'building_name', 'raw_address']]
    return [df]

dfs = []
for file, name in zip(patrick_files, patrick_names):
    dfs += clean_file_patrick(file, name)
patrick_appended = pd.concat(dfs)
patrick_appended['author'] = 'Patrick'

In [5]:
tim_files = [
    "hsl_properties.csv",
    "irt_living.csv",
    "jbg_smith_living.csv",
    "kairoi_residential.csv",
    "knightvest_residential.csv",
    "lantower_living.csv",
    "mid_america_apartments.csv",
    "mission_rock_residential.csv",
    "paradigm_companies.csv",
    "prometheus_apartments.csv",
    "rose_associates.csv",
    "rpm_living.csv",
    "srg_living.csv"
]
tim_names = [
    "HSL Properties",
    "Independence Realty Trust",
    "JBG Smith Properties",
    "Kairoi Management",
    "Knightvest Residential",
    "Lantower Luxury Living",
    "Mid-America Apartments",
    "Mission Rock Residential",
    "Paradigm Management",
    "Prometheus Real Estate Group",
    "Rose Associates",
    "RPM Living",
    "Sares Regis Group"
]

In [6]:
def clean_file_tim(file, name):
    '''
    Import and clean a file gathered by Tim.
    '''
    df = pd.read_csv('..\\data\\pm_data_clean\\'+file)
    df.dropna(subset='building_name', inplace=True)
    df = df[['company_name', 'building_name', 'raw_address']]
    return [df]

dfs = []
for file, name in zip(tim_files, tim_names):
    dfs += clean_file_tim(file, name)
tim_appended = pd.concat(dfs)
tim_appended['author'] = 'Tim'

In [7]:
ahmad_csvs = [
    "security_prop_appts.csv",
    "thrive communities appt.csv",
    "master_weidner.csv",
    "cws_apartments.csv",
    "essex.csv",
    "gables.csv",
    "highmarkres.csv",
]
ahmad_csvs_names = [
    "Security Properties Residential",
    "Thrive Communities",
    "Weidner Property Management",
    "CWS Apartment Homes",
    "Essex Property Trust",
    "Gables Residential Services",
    "Highmark Residential"
]

In [8]:
raw_dfs = [pd.read_csv('..\\data\\pm_data_clean\\'+file) for file in ahmad_csvs]

In [9]:
def rename_ahmad(df, building, addr):
    '''
    Given an column name for buildings and a column name for addresses,
    renames the columns to a standard format.
    '''
    out = df.rename(columns={building:'building_name', addr:'raw_address'})
    return out

In [10]:
building_names = [
    "js-property-details",
    "small-margin",
    "property-name",
    "Community Name",
    "community-card__content__property",
    "MuiTypography-root",
    "name"
]
building_addresses = [
    "prop-address",
    "small-margin (3)",
    "mb-1",
    "Address",
    "community-card__content__location",
    "raw_address",
    "full-address"
]

In [11]:
raw_dfs[2]['property-name'] = [re.sub(r'opens in a new tab', '', string).strip() for string in raw_dfs[2]['property-name']]
raw_dfs[5]['raw_address'] = ""

In [12]:
dfs = []
for i in range(len(raw_dfs)):
    df = rename_ahmad(raw_dfs[i], building_names[i], building_addresses[i])
    df['company_name'] = ahmad_csvs_names[i]
    df = df[['company_name', 'building_name', 'raw_address']]
    df.head()
    dfs += [df]
dfs = pd.concat(dfs)

In [13]:
f = open("..\\data\\pm_data_clean\\ZLS.txt")
zrs_df = pd.read_json(f)
zrs_df['Zip'] = zrs_df['Zip'].astype(str)
zrs_df['State Zip'] = zrs_df[["State", "Zip"]].agg(" ".join, axis=1)
zrs_df['raw_address'] = zrs_df[["Address", "City", "State Zip"]].agg(", ".join, axis=1)
zrs_df['company_name'] = "ZRS Management LLC"
zrs_df['building_name'] = zrs_df["Name"]
zrs_df = zrs_df[["company_name", "building_name", "raw_address"]]
f.close()

In [14]:
simpson_1 = json.loads(open("..\\data\\pm_data_clean\\simpson_1.txt").read())
simpson_allcities = json.loads(open("..\\data\\pm_data_clean\\simpson_allcities.txt").read())
simpson_allmapcities = json.loads(open("..\\data\\pm_data_clean\\simpson_allmapcities.txt").read())

In [15]:
simpson_dfs = []
for jsonobj in [simpson_1, simpson_allcities, simpson_allmapcities]:
    for item in jsonobj:
        simpson_dfs += [pd.DataFrame(item['Communities'])]
simpson_dfs = pd.concat(simpson_dfs)
simpson_dfs.drop_duplicates(subset=["CommunityID", "Name", "Address1", "Address2", "City", "State", "Zip"], inplace=True)

In [16]:
simpson_dfs['Address'] = simpson_dfs[["Address1", "Address2"]].agg(" ".join, axis=1)
simpson_dfs['State Zip'] = simpson_dfs[["State", "Zip"]].agg(" ".join, axis=1)
simpson_dfs['raw_address'] = simpson_dfs[["Address", "City", "State Zip"]].agg(", ".join, axis=1)
simpson_dfs['building_name'] = simpson_dfs['Name']
simpson_dfs['company_name'] = "Simpson Property Group"
simpson_dfs = simpson_dfs[["company_name", "building_name", "raw_address"]]

In [17]:
ahmad_appended = pd.concat([dfs, zrs_df, simpson_dfs])
ahmad_appended['author'] = 'Ahmad'

In [18]:
wcsmith = pd.read_csv("..\\data\\pm_data_clean\\wcsmith_listings.csv")
wcsmith.rename(columns={"propertyName":"building_name"}, inplace=True)
wcsmith['company_name'] = "William C. Smith & Co."
wcsmith["propertyZip"] = wcsmith["propertyZip"].astype(str)
wcsmith['State Zip'] = wcsmith[["propertyState", "propertyZip"]].agg(" ".join, axis=1)
wcsmith['raw_address'] = wcsmith[["propertyAddress", "propertyCity", "State Zip"]].agg(", ".join, axis=1)
wcsmith = wcsmith[["company_name", "building_name", "raw_address"]]

In [19]:
wbpc = pd.read_csv("..\\data\\pm_data_clean\\geocoded_wpbc_data.csv")
wbpc.rename(columns={"propertyName":"building_name", "fullAddress":"raw_address"}, inplace=True)
wbpc["building_name"] = ""
wbpc["company_name"] = "Willow Bridge Property Co."
wbpc = wbpc[["company_name", "building_name", "raw_address"]]

In [20]:
chelle_appended = pd.concat([wcsmith, wbpc])
chelle_appended['author'] = 'Chelle'

In [21]:
all_appended = pd.concat(
    [
        patrick_appended,
        tim_appended,
        ahmad_appended, 
        chelle_appended
    ]
)
all_appended.shape

(8824, 4)

In [22]:
all_appended.to_csv('..\\data\\pm_data_small_appended.csv', index=False)