In [1]:
import pandas as pd
import json, os
from time import strftime
import numpy as np

In [2]:
BAD_OWNERS = ['SENTER', 'RAILWAY', 'CITY', 'GOVERNMENT', 'GOVT', 'STATE OF', 'PIPE', 'CEMETERY', 'SCHOOL', 'TOWER', 'DISTRICT', 'SYSTEM']
BAD_ZONING = ['Commercial', 'Public Street', 'Utility Use']
OUTPUT_DIR = '../output/'

In [3]:
def split_address(address):
    *street, town, area = address.split(',')
    state, Zip = area.split()
    street, town, state, Zip = (s.strip() for s in (','.join(street), town, state, Zip))
    Zip = Zip[:5]
    return street, town, state, Zip

def good_address(address_list):
    good_flags = []
    for address in address_list:
        try:
            split_address(address)
            good_flags.append(True)
        except:
            good_flags.append(False)
    return good_flags

def df_to_click2mail(df):
    ad_columns     = ['Address', 'City', 'State', 'Zip']
    output         = pd.DataFrame(map(split_address, df.owner_address.values), columns=ad_columns)
    output['Name'] = [name[:-2] if name.endswith(' &') else name for name in df.owner_name]
    output.drop_duplicates(subset='Name', inplace=True)
    
    return output[['Name', *ad_columns]]

def remove_bad_owners(df, col='Name'):
    bad_owners_str = '|'.join(BAD_OWNERS)
    return df[~df[col].str.contains(bad_owners_str, case=False)]

**Tax penalty 1..20 acres and Wylie 1..10 acres, absentee only**

In [14]:
# Tax delinquent land with penalty

min_acres, max_acres = 0.9, 20.1

df      = pd.read_json(f'{OUTPUT_DIR}/output_taylor/output_taylor.json')
df      = df[good_address(df.owner_address.values)]
df      = df[df.absentee & df.empty_land & ~df.inactive & df.recent_penalty]
df      = df[~df.zoning.isin(BAD_ZONING)]
col_set = set(df.columns)-{'absentee', 'empty_land', 'inactive', 'land_dict', 'property_use'}
df      = df[[*col_set]]
df      = df.sort_values(by='recent_penalty', ascending=False)
df      = df[(df.land_area <= max_acres) & (min_acres <= df.land_area)]

df1 = df

In [15]:
# Small empty land in Wylie ISD

min_acres, max_acres = 0.9, 10.1

df      = pd.read_json(f'{OUTPUT_DIR}/output_taylor/output_taylor.json')
df      = df[good_address(df.owner_address.values)]
df      = df[df.absentee & df.empty_land & ~df.inactive]
df      = df[df.recent_penalty==0.0]
df      = df[df.school=='WYLIE']
df      = df[~df.zoning.isin(BAD_ZONING)]
col_set = set(df.columns)-{'absentee', 'empty_land', 'inactive', 'land_dict', 'property_use'}
df      = df[[*col_set]]
df      = df[(df.land_area <= max_acres) & (min_acres <= df.land_area)]

df2 = df

In [16]:
df_full = pd.concat([df1, df2], ignore_index=True)

In [17]:
df_mailing = df_to_click2mail(df_full)
df_mailing = remove_bad_owners(df_mailing)
df_mailing.shape

(333, 5)

In [18]:
timestamp = strftime("%Y%m%d_%H%M%S")
df_mailing.to_csv(f'{OUTPUT_DIR}/ml_{timestamp}.csv', index=False)

**Delinquent multi-family homes**

In [20]:
# Tax delinquent multifamily

df = pd.read_json(f'{OUTPUT_DIR}/output_taylor/output_taylor.json')
df = df[(df.property_use=='MULTIPLE RESIDENCE') & (df.recent_delinq)]
df.to_csv(f'{OUTPUT_DIR}/multi.csv', index = False)

df_mailing = df_to_click2mail(df)
df_mailing = remove_bad_owners(df_mailing)
df_mailing.to_csv(f'{OUTPUT_DIR}/ml_multi_names.csv', index=False)

**Delinquent land without penalty 1..20 acres, absentee only**

In [21]:
min_acres, max_acres = 0.9, 20.1

df      = pd.read_json(f'{OUTPUT_DIR}/output_taylor/output_taylor.json')
df      = df[good_address(df.owner_address.values)]
df      = df[df.absentee & df.empty_land & ~df.inactive ]
df      = df[(~df.recent_penalty.values.astype(bool)) & df.recent_delinq.values.astype(bool)]
df      = df[~df.zoning.isin(BAD_ZONING)]
col_set = set(df.columns)-{'absentee', 'empty_land', 'inactive', 'land_dict', 'property_use'}
df      = df[[*col_set]]
df      = df.sort_values(by='recent_penalty', ascending=False)
df      = df[(df.land_area <= max_acres) & (min_acres <= df.land_area)]

In [22]:
df_mailing         = df_to_click2mail(df)
df_mailing         = remove_bad_owners(df_mailing)
already_sent_names = pd.read_csv(f'{OUTPUT_DIR}/ml_penalty20_wylie10.csv')['Name']
df_mailing         = df_mailing[~df_mailing.Name.isin(already_sent_names.values)]

In [13]:
timestamp = strftime("%Y%m%d_%H%M%S")
df_mailing.to_csv(f'{OUTPUT_DIR}/ml_{timestamp}.csv', index=False)

**Empty lots in Hawley**

In [26]:
df      = pd.read_csv(f'{OUTPUT_DIR}/output_jones/output_jones.csv')
df      = df[df.empty_land & df.absentee]
df      = df[df.school.str.startswith('HAWLEY')]
df      = df[(df.land_area>=0.1) & (df.land_area<=5)]
df      = df[df.property_use < 'E'] # residential only
df      = remove_bad_owners(df, col='owner_name')

In [36]:
df_export = df[['prop_id', 'prop_address', 'owner_name', 'owner_address', 'property_use', 'land_area', \
                'recent_penalty', 'recent_delinq']]
df_export.to_csv(f'{OUTPUT_DIR}/hawley_empty.csv', index=False)

**Russian speaking owners**

In [10]:
df = pd.read_csv(f'{OUTPUT_DIR}/output_taylor/output_taylor.csv')
df = df[~df.owner_name.isnull()]

names = np.unique(df[df.owner_name.str.contains('^\S*(ov|ova|ev|eva|ko|ian|yan)\s', case=False)].owner_name.values)
np.unique(list(map(lambda x: x.split()[0], names)))

array(['ABAMISLIMOVA', 'ADRIAN', 'BABAIAN', 'BARBIAN', 'BASTIAN', 'BATKO',
       'BOTZKO', 'BRYAN', 'BYKOV', 'CAPKO', 'CHAPKO', 'CHRISTIAN',
       'CORDOVA', 'CORPIAN', 'DAMIAN', 'EV', 'GAMBLIAN', 'GORBENKO',
       'HACOPIAN', 'HARCHENKO', 'HRISTOVA', 'INDIAN', 'JULIAN',
       'KALAYDZHYAN', 'KARABEGOV', 'KO', 'KOSHELEVA', 'KRUSHEVA',
       'LANUEVA', 'LUSKO', 'LUTZKO', 'MALKUIAN', 'MARDEROSIAN', 'MBONEKO',
       'MCMILLIAN', 'MIKO', 'MILIAN', 'MUZECHENKO', 'NURMUKHAMEDOV',
       'OSKIERKO', 'PAYAN', 'PERMIAN', 'PETROVA', 'PHYSICIAN', 'PLISKO',
       'PRESBYTERIAN', 'QIAN', 'RHYAN', 'ROYKO', 'RUNYAN', 'RYAN',
       'SALAKO', 'SANDLIAN', 'SHAMLIAN', 'SHOMANSUROV', 'UNITARIAN',
       'VALORIAN', 'VENETIAN', 'VILLANUEVA', 'VIVIAN', 'VOLCKO', 'ZAKO',
       'ZELEV', 'ZELISKO'], dtype='<U13')

In [17]:
df[df.owner_name.str.contains('SHOMANSUROV')]

Unnamed: 0,prop_id,legal_description,prop_address,owner_name,owner_address,transfer_date,absentee,empty_land,improvement_value,property_use,zoning,land_area,land_dict,recent_penalty,recent_delinq,school,inactive
3318,103507,"A0516 SUR 68 L A L E/2 OF SE/4, ACRES 2.003",1390 CR 154 TX,SHOMANSUROV BOKHODIR,"DBA MOBILE HOME CONCEPTS, 4742 DERRICK DR, ABI...",2009-09-22,True,True,0,,Residential,2.003,{'Residential': 2.003},0.0,0.0,JIM,False
14400,19153,"SUNLAKE VILLAGE ADDN SEC 2, BLOCK A, LOT 21",2913 ASPEN DR TX,SHOMANSUROV BO,"4742 DERRICK DR, ABILENE, TX 79601-6712",2015-06-04,True,True,0,,Residential,0.1928,{'Residential': 0.1928},0.0,0.0,ABILENE,False
15260,20184,"SUNLAKE VILLAGE ADDN SEC 2, BLOCK A, LOT 29",2945 ASPEN DR,SHOMANSUROV BO,"3126 SANTA MONICA DR, ABILENE, TX 79605-6733",2008-12-01,True,True,0,,Residential,0.1452,{'Residential': 0.1452},0.0,0.0,ABILENE,False
19805,25866,"WYCHWOOD PLAZA, BLOCK H, LOT 4",3126 SANTA MONICA DR,SHOMANSUROV SUSAN WADE,"3126 SANTA MONICA DR, ABILENE, TX 79605-6733",2014-05-30,False,False,139962,,Residential,0.1687,{'Residential': 0.1687},0.0,0.0,ABILENE,False
28518,36541,"H & W TOOL CO SEC 1 CONT 2, BLOCK A, LOT 10, A...",4802 DERRICK DR TX,SHOMANSUROV BAKHODIR,"4742 DERRICK DR, ABILENE, TX 79601-6712",2016-12-19,True,False,87149,,Commercial,2.3623,{'Commercial': 2.3623},0.0,0.0,EULA,False
37188,47063,"A0073 SUR 147 GRIMES COUNTY SCHOOL, TRACT 30, ...",9210 I-20 TX,SHOMANSUROV BAKHODIR,"4742 DERRICK DR, ABILENE, TX 79601-6712",2017-01-13,True,False,49263,,Commercial,0.89,{'Commercial': 0.89},0.0,0.0,MERKEL,False
42740,53666,"THE MEADOWS CONT 1, BLOCK A, LOT 1-3, ACRES 19...",,SHOMANSUROV BAKHODIR,"4742 DERRICK DR, ABILENE, TX 79601-6712",2017-11-20,True,False,163845,MANUFACTURED HOUSING PARK,Commercial,19.2715,{'Commercial': 19.2715},0.0,0.0,ABILENE,False
56608,70570,"A0317 SUR 37 ALEXANDER THOMPSON, TRACT 4 ANDER...",,SHOMANSUROV BAKHODIR,"4742 DERRICK DR, ABILENE, TX 79601-6712",2017-11-20,True,True,0,,Pasture Class 5,0.72,{'Pasture Class 5': 0.72},0.0,0.0,ABILENE,False
57858,72149,"LANEY HOMESTEAD, BLOCK A, LOT 29",CECIL,SHOMANSUROV BO,"4742 DERRICK DR, ABILENE, TX 79601-6712",2013-06-03,True,True,0,,Residential,0.2491,{'Residential': 0.2491},0.0,0.0,MERKEL,False


**Largest land owner**

In [12]:
AGGR_KEY = 'recent_delinq'

df    = pd.read_csv(f'{OUTPUT_DIR}/output_taylor/output_taylor.csv')
df    = df[~df.owner_name.isnull()]
df[['owner_name', AGGR_KEY]].groupby('owner_name').sum().sort_values(ascending=False, by=AGGR_KEY)[:20]

Unnamed: 0_level_0,recent_delinq
owner_name,Unnamed: 1_level_1
WELLTOWER TCG NNN LANDLORD LLC,360432.14
ER PROPCO WC LLC,238649.5
ABILENE TEACHERS FEDERAL,152068.55
WISTERIA HEALTH HOLDINGS LLC,150923.31
FUNERAL DIRECTORS LIFE INS CO,142362.8
SPT IVEY ABILENE MOB LLC,141142.33
TOWER TECH SYSTEMS INC,140908.87
WHITENER FAMILY LIMITED PTSHP 1,135691.94
PAK HARRIS ENTERPRISES LTD,125956.91
PETROSMITH EQUIPMENT LP,122243.38
