In [1]:
import pandas as pd
import json, os
from time import strftime
import numpy as np

In [2]:
BAD_OWNERS = ['SENTER', 'RAILWAY', 'CITY', 'GOVERNMENT', 'GOVT', 'STATE OF', 'PIPE', 'CEMETERY', 'SCHOOL', 'TOWER', 'DISTRICT', 'SYSTEM', 'CRUZ SHERIDAN']
BAD_ZONING = ['Commercial', 'Public Street', 'Utility Use']

In [3]:
def split_address(address):
    *street, town, area = address.split(',')
    state, Zip = area.split()
    street, town, state, Zip = (s.strip() for s in (','.join(street), town, state, Zip))
    Zip = Zip[:5]
    return street, town, state, Zip

def good_address(address_list):
    good_flags = []
    for address in address_list:
        try:
            split_address(address)
            good_flags.append(True)
        except:
            good_flags.append(False)
    return good_flags

def df_to_click2mail(df):
    ad_columns     = ['Address', 'City', 'State', 'Zip']
    output         = pd.DataFrame(map(split_address, df.owner_address.values), columns=ad_columns)
    output['Name'] = [name[:-2] if name.endswith(' &') else name for name in df.owner_name]
    output.drop_duplicates(subset='Name', inplace=True)
    
    return output[['Name', *ad_columns]]

def remove_bad_owners(df, col='Name'):
    bad_owners_str = '|'.join(BAD_OWNERS)
    return df[~df[col].str.contains(bad_owners_str, case=False)]

**Tax penalty 1..20 acres and Wylie 1..10 acres, absentee only**

In [14]:
# Tax delinquent land with penalty

min_acres, max_acres = 0.9, 20.1

df      = pd.read_json(f'{os.getcwd()}/output.json')
df      = df[good_address(df.owner_address.values)]
df      = df[df.absentee & df.empty_land & ~df.inactive & df.recent_penalty]
df      = df[~df.zoning.isin(BAD_ZONING)]
col_set = set(df.columns)-{'absentee', 'empty_land', 'inactive', 'land_dict', 'property_use'}
df      = df[[*col_set]]
df      = df.sort_values(by='recent_penalty', ascending=False)
df      = df[(df.land_area <= max_acres) & (min_acres <= df.land_area)]

df1 = df

In [15]:
# Small empty land in Wylie ISD

min_acres, max_acres = 0.9, 10.1

df      = pd.read_json(f'{os.getcwd()}/output.json')
df      = df[good_address(df.owner_address.values)]
df      = df[df.absentee & df.empty_land & ~df.inactive]
df      = df[df.recent_penalty==0.0]
df      = df[df.school=='WYLIE']
df      = df[~df.zoning.isin(BAD_ZONING)]
col_set = set(df.columns)-{'absentee', 'empty_land', 'inactive', 'land_dict', 'property_use'}
df      = df[[*col_set]]
df      = df[(df.land_area <= max_acres) & (min_acres <= df.land_area)]

df2 = df

In [16]:
df_full = pd.concat([df1, df2], ignore_index=True)

In [8]:
df_mailing = df_to_click2mail(df_full)
df_mailing = remove_bad_owners(df_mailing)
df_mailing.shape

(332, 5)

In [86]:
timestamp = strftime("%Y%m%d_%H%M%S")
df_mailing.to_csv(f'{os.getcwd()}/ml_{timestamp}.csv', index=False)

**Delinquent multi-family homes**

In [14]:
# Tax delinquent multifamily

df = pd.read_json(f'{os.getcwd()}/output.json')
df = df[(df.property_use=='MULTIPLE RESIDENCE') & (df.recent_delinq)]
df.to_csv(f'{os.getcwd()}/multi.csv', index = False)

df_mailing = df_to_click2mail(df)
df_mailing = remove_bad_owners(df_mailing)
df_mailing.to_csv(f'{os.getcwd()}/ml_multi_names.csv', index=False)

In [11]:
df_full = remove_bad_owners(df_full, col='owner_name')
df_full.to_csv('~/temp/current_list.csv', index=False)
df_full.shape

(420, 10)

**Delinquent land without penalty 1..20 acres, absentee only**

In [11]:
min_acres, max_acres = 0.9, 20.1

df      = pd.read_json(f'{os.getcwd()}/output.json')
df      = df[good_address(df.owner_address.values)]
df      = df[df.absentee & df.empty_land & ~df.inactive ]
df      = df[(~df.recent_penalty.values.astype(bool)) & df.recent_delinq.values.astype(bool)]
df      = df[~df.zoning.isin(BAD_ZONING)]
col_set = set(df.columns)-{'absentee', 'empty_land', 'inactive', 'land_dict', 'property_use'}
df      = df[[*col_set]]
df      = df.sort_values(by='recent_penalty', ascending=False)
df      = df[(df.land_area <= max_acres) & (min_acres <= df.land_area)]

In [12]:
df_mailing         = df_to_click2mail(df)
df_mailing         = remove_bad_owners(df_mailing)
already_sent_names = pd.read_csv(f'{os.getcwd()}/ml_penalty20_wylie10.csv')['Name']
df_mailing         = df_mailing[~df_mailing.Name.isin(already_sent_names.values)]

In [13]:
timestamp = strftime("%Y%m%d_%H%M%S")
df_mailing.to_csv(f'{os.getcwd()}/ml_{timestamp}.csv', index=False)