# Owner Analysis

Most of these functions are for the paper: 'Hiding in Housing'

In [None]:
from tqdm import tqdm
import json
import matplotlib.pyplot as plt
import numpy
import csv

### Find One Property Owners
This function is used to find owners that own only one property.

In [None]:
def find_one_prop_owners(source):
    data = json.load(open(source))
    count = 0
    for owner in tqdm(data, total=len(data)):
        if owner[1] == 1:
            count += 1
    percentage = (count / len(data)) * 100
    print('There are ', count, 'one property owners in this dataset.')
    print("That's ", percentage, '% of total owners.')

In [None]:
find_one_prop_owners('./../../data_sets/sorted_landlords.json')

### Find owner_2
This function is used to find and count properties with 2 owners.

In [None]:
def get_owner_2_count(source, output):
    owners = []
    owner2_count = 0
    with open(source, mode="r") as csv_file:
        csv_reader = csv.DictReader(csv_file)
        line_count = 0
        for row in tqdm(csv_reader, total=581456):
            if line_count == 0:
                line_count += 1
            else:
                try:
                    owners.append([row["owner_1"].strip(), row["owner_2"].strip()])
                    if (row["owner_2"].strip() == ""):
                        owner2_count += 1
                except:
                     print(row["owner_1"].strip(), "is missing a count.")
    with open(output, 'w') as file:
        file.write(json.dumps(owners))
    print('There are ', owner2_count, 'owner_2s in this dataset.')

In [None]:
get_owner_2_count('./../../data_sets/opa_properties_public.csv', './../../data_sets/owner1_owner2.json')

### Get Owners and Mailing Addresses
This function will create a list that holds both owners and the mailing address.

In [29]:
def get_owners_and_mailing_address(source, output):
    owners_with_mailing_address = []
    mailing_address_count = 0
    with open(source, mode="r") as csv_file:
        csv_reader = csv.DictReader(csv_file)
        line_count = 0
        owner_1_count = 0
        owner_2_count = 0
        mailing_address_1_count = 0
        mailing_address_2_count = 0
        mailing_care_of_count = 0
        mailing_city_state_count = 0
        mailing_street_count = 0
        mailing_zip_count = 0
        for row in tqdm(csv_reader, total=581456):
            if line_count == 0:
                line_count += 1
            else:
                line_count += 1
            
                owner_1 = row["owner_1"].strip()
                owner_2 = row["owner_2"].strip()
                mailing_address_1 = row["mailing_address_1"].strip()
                mailing_address_2 = row["mailing_address_2"].strip()
                mailing_care_of = row["mailing_care_of"].strip()
                mailing_city_state = row["mailing_city_state"].strip()
                mailing_street = row["mailing_street"].strip()
                mailing_zip = row["mailing_zip"].strip()
                mailing_info = [
                    mailing_address_1,
                    mailing_address_2,
                    mailing_care_of,
                    mailing_city_state,
                    mailing_street,
                    mailing_zip
                ]
                owners_with_mailing_address.append([owner_1, owner_2, mailing_info])
                if (owner_1 != ""):
                    owner_1_count += 1
                if (owner_2 != ""):
                    owner_2_count += 1
                if (mailing_address_1 != ""):
                    mailing_address_1_count += 1
                if (mailing_address_2 != ""):
                    mailing_address_2_count += 1
                if (mailing_care_of != ""):
                    mailing_care_of_count += 1
                if (mailing_city_state != ""):
                    mailing_city_state_count += 1
                if (mailing_street != ""):
                    mailing_street_count += 1
                if (mailing_zip != ""):
                    mailing_zip_count += 1

    with open(output, 'w') as file:
        file.write(json.dumps(owners_with_mailing_address))
    line_count -= 1 # need to account for the first line in the csv
    print('There are ', line_count, 'properties in this dataset.')
    print('There are ', owner_1_count, 'owner_1s in this dataset or ', (owner_1_count/line_count)*100, ' %.')
    print('There are ', owner_2_count, 'owner_2s in this dataset or ', (owner_2_count/line_count)*100, ' %.')
    print('There are ', mailing_address_1_count, 'mailing_address_1s in this dataset or ', (mailing_address_1_count/line_count)*100, ' %.')
    print('There are ', mailing_address_2_count, 'mailing_address_2s in this dataset or ', (mailing_address_2_count/line_count)*100, ' %.')
    print('There are ', mailing_care_of_count, 'mailing_care_ofs in this dataset or ', (mailing_care_of_count/line_count)*100, ' %.')
    print('There are ', mailing_city_state_count, 'mailing_city_states in this dataset or ', (mailing_city_state_count/line_count)*100, ' %.')
    print('There are ', mailing_street_count, 'mailing_streets in this dataset or ', (mailing_street_count/line_count)*100, ' %.')
    print('There are ', mailing_zip_count, 'mailing_zips in this dataset or ', (mailing_zip_count/line_count)*100, ' %.')
    
    

In [30]:
get_owners_and_mailing_address('./../../data_sets/opa_properties_public.csv', './../../data_sets/owners_mailing_address.json')

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 581456/581456 [00:09<00:00, 63005.73it/s]


There are  581455 properties in this dataset.
There are  581455 owner_1s in this dataset or  100.0  %.
There are  204927 owner_2s in this dataset or  35.24382798324892  %.
There are  32299 mailing_address_1s in this dataset or  5.554858071561858  %.
There are  11185 mailing_address_2s in this dataset or  1.9236226363175137  %.
There are  23098 mailing_care_ofs in this dataset or  3.972448426791411  %.
There are  220035 mailing_city_states in this dataset or  37.842137396703095  %.
There are  220037 mailing_streets in this dataset or  37.84248136141232  %.
There are  219783 mailing_zips in this dataset or  37.798797843341276  %.


### Find LLC Owners
This function will find LLC owners and count them.

In [36]:
def find_llc_owners(source, output):
    data = json.load(open(source))
    out = []
    count = 0
    property_count = 0
    for owner in tqdm(data, total=len(data)):
        if owner[0].find('LLC') != -1:
            out.append(owner)
            count += 1
            property_count += owner[1]

    json.dump(out, open(output, 'w+'))
    
    print('There are ', count, 'LLCs in this dataset or ', (count/len(data))*100, '% of owners.')
    print('LLCs own ', property_count, 'properties or ', (property_count / 581455)*100, '% of properties.') # hard number sourced from previous function

In [37]:
find_llc_owners('./../../data_sets/sorted_landlords.json', './data/llc_owner.json')

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 429983/429983 [00:00<00:00, 3038211.68it/s]


There are  16770 LLCs in this dataset or  3.90015419214248 % of owners.
LLCs own  17868 properties or  3.0729807121789308 % of properties.
